diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
new file mode 100644
index 00000000..b696b926
--- /dev/null
+++ b/.github/workflows/docs.yaml
@@ -0,0 +1,33 @@
+name: docs
+
+on: ["push", "pull_request"]
+
+jobs:
+ docs:
+ # We want to run on external PRs, but not on our own internal PRs as they'll be run
+ # by the push to the branch.
+ if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
+
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.x'
+ cache: "pip"
+ cache-dependency-path: |
+ requirements.txt
+ docs/requirements.txt
+
+ - name: Build
+ run: |
+ pip install -r requirements.txt
+ make cython
+
+ - name: Sphinx Documentation Generator
+ run: |
+ pip install -r docs/requirements.txt
+ make docs
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
new file mode 100644
index 00000000..198cf7b5
--- /dev/null
+++ b/.github/workflows/lint.yaml
@@ -0,0 +1,22 @@
+name: lint
+
+on: ["push", "pull_request"]
+
+jobs:
+ lint:
+ # We want to run on external PRs, but not on our own internal PRs as they'll be run
+ # by the push to the branch.
+ if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
+
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: ruff check
+ run: |
+ pipx run ruff check --diff msgpack/ test/ setup.py
+
+ - name: ruff format
+ run: |
+ pipx run ruff format --diff msgpack/ test/ setup.py
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 00000000..23d221c8
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,61 @@
+name: Run tests
+on:
+ push:
+ branches: [main]
+ pull_request:
+ create:
+
+jobs:
+ test:
+ strategy:
+ matrix:
+ os: ["ubuntu-latest", "windows-latest", "macos-latest"]
+ py: ["3.14-dev", "3.13", "3.12", "3.11", "3.10", "3.9", "3.8"]
+
+ runs-on: ${{ matrix.os }}
+ name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }}
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.py }}
+ allow-prereleases: true
+ cache: "pip"
+
+ - name: Prepare
+ shell: bash
+ run: |
+ pip install -U pip
+ pip install -r requirements.txt pytest
+
+ - name: Build
+ shell: bash
+ run: |
+ make cython
+ pip install .
+
+ - name: Test (C extension)
+ shell: bash
+ run: |
+ pytest -v test
+
+ - name: Test (pure Python fallback)
+ shell: bash
+ run: |
+ MSGPACK_PUREPYTHON=1 pytest -v test
+
+ - name: build packages
+ shell: bash
+ run: |
+ pip install build
+ python -m build
+
+ - name: upload packages
+ uses: actions/upload-artifact@v4
+ with:
+ name: dist-${{ matrix.os }}-${{ matrix.py }}
+ path: dist
diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml
new file mode 100644
index 00000000..686d7dd0
--- /dev/null
+++ b/.github/workflows/wheel.yml
@@ -0,0 +1,88 @@
+name: Build sdist and Wheels
+on:
+ push:
+ branches: [main]
+ release:
+ types:
+ - published
+ workflow_dispatch:
+
+jobs:
+ build_wheels:
+ strategy:
+ matrix:
+ # macos-13 is for intel
+ os: ["ubuntu-24.04", "ubuntu-24.04-arm", "windows-latest", "macos-13", "macos-latest"]
+ runs-on: ${{ matrix.os }}
+ name: Build wheels on ${{ matrix.os }}
+
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.x"
+ cache: "pip"
+ - name: Cythonize
+ shell: bash
+ run: |
+ pip install -r requirements.txt
+ make cython
+
+ - name: Build
+ uses: pypa/cibuildwheel@v2.23.3
+ env:
+ CIBW_TEST_REQUIRES: "pytest"
+ CIBW_TEST_COMMAND: "pytest {package}/test"
+ CIBW_SKIP: "pp* cp38-macosx_*"
+
+ - name: Build sdist
+ if: runner.os == 'Linux' && runner.arch == 'X64'
+ run: |
+ pip install build
+ python -m build -s -o wheelhouse
+
+ - name: Upload Wheels to artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: wheels-${{ matrix.os }}
+ path: wheelhouse
+
+ # combine all wheels into one artifact
+ combine_wheels:
+ needs: [build_wheels]
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/download-artifact@v4
+ with:
+ # unpacks all CIBW artifacts into dist/
+ pattern: wheels-*
+ path: dist
+ merge-multiple: true
+
+ - name: Upload Wheels to artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: wheels-all
+ path: dist
+
+ # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml
+ upload_pypi:
+ needs: [build_wheels]
+ runs-on: ubuntu-latest
+ environment: pypi
+ permissions:
+ id-token: write
+ if: github.event_name == 'release' && github.event.action == 'published'
+ # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this)
+ # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
+ steps:
+ - uses: actions/download-artifact@v4
+ with:
+ # unpacks all CIBW artifacts into dist/
+ pattern: wheels-*
+ path: dist
+ merge-multiple: true
+
+ - uses: pypa/gh-action-pypi-publish@release/v1
+ #with:
+ # To test: repository-url: https://test.pypi.org/legacy/
diff --git a/.gitignore b/.gitignore
index 1bd68b49..341be631 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,10 +2,16 @@ MANIFEST
build/*
dist/*
.tox
+.python-version
*.pyc
*.pyo
*.so
*~
msgpack/__version__.py
+msgpack/*.c
msgpack/*.cpp
*.egg-info
+/venv
+/tags
+/docs/_build
+.cache
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 00000000..88d87182
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,24 @@
+# Read the Docs configuration file for Sphinx projects.
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details.
+
+version: 2
+
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.11"
+ apt_packages:
+ - build-essential
+ jobs:
+ pre_install:
+ - pip install -r requirements.txt
+ - make cython
+
+python:
+ install:
+ - method: pip
+ path: .
+ - requirements: docs/requirements.txt
+
+sphinx:
+ configuration: docs/conf.py
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index e536fdcf..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-language: python
-python:
- - 2.7
-
-env:
- - PIP_USE_MIRRORS=true
-
-install:
- - sudo apt-get update -qq
- - sudo apt-get install -q python3.3-dev
- - pip install --use-mirrors tox cython
- - cython --cplus msgpack/_packer.pyx
- - cython --cplus msgpack/_unpacker.pyx
-
-script: "tox && MSGPACK_PUREPYTHON=x tox"
diff --git a/ChangeLog.rst b/ChangeLog.rst
index fe64ff81..418c444c 100644
--- a/ChangeLog.rst
+++ b/ChangeLog.rst
@@ -1,8 +1,418 @@
+1.1.1
+=====
+
+Release Date: 2025-06-13
+
+* No change from 1.1.1rc1.
+
+1.1.1rc1
+========
+
+Release Date: 2025-06-06
+
+* Update Cython to 3.1.1 and cibuildwheel to 2.23.3.
+
+1.1.0
+=====
+
+Release Date: 2024-09-10
+
+* use ``PyLong_*`` instead of ``PyInt_*`` for compatibility with
+ future Cython. (#620)
+
+1.1.0rc2
+========
+
+Release Date: 2024-08-19
+
+* Update Cython to 3.0.11 for better Python 3.13 support.
+* Update cibuildwheel to 2.20.0 to build Python 3.13 wheels.
+
+1.1.0rc1
+========
+
+Release Date: 2024-05-07
+
+* Update Cython to 3.0.10 to reduce C warnings and future support for Python 3.13.
+* Stop using C++ mode in Cython to reduce compile error on some compilers.
+* ``Packer()`` has ``buf_size`` option to specify initial size of
+ internal buffer to reduce reallocation.
+* The default internal buffer size of ``Packer()`` is reduced from
+ 1MiB to 256KiB to optimize for common use cases. Use ``buf_size``
+ if you are packing large data.
+* ``Timestamp.to_datetime()`` and ``Timestamp.from_datetime()`` become
+ more accurate by avoiding floating point calculations. (#591)
+* The Cython code for ``Unpacker`` has been slightly rewritten for maintainability.
+* The fallback implementation of ``Packer()`` and ``Unpacker()`` now uses keyword-only
+ arguments to improve compatibility with the Cython implementation.
+
+1.0.8
+=====
+
+Release Date: 2024-03-01
+
+* Update Cython to 3.0.8. This fixes memory leak when iterating
+ ``Unpacker`` object on Python 3.12.
+* Do not include C/Cython files in binary wheels.
+
+
+1.0.7
+=====
+
+Release Date: 2023-09-28
+
+* Fix build error of extension module on Windows. (#567)
+* ``setup.py`` doesn't skip build error of extension module. (#568)
+
+
+1.0.6
+=====
+
+Release Date: 2023-09-21
+
+.. note::
+ v1.0.6 Wheels for Windows don't contain extension module.
+ Please upgrade to v1.0.7 or newer.
+
+* Add Python 3.12 wheels (#517)
+* Remove Python 2.7, 3.6, and 3.7 support
+
+
+1.0.5
+=====
+
+Release Date: 2023-03-08
+
+* Use ``__BYTE_ORDER__`` instead of ``__BYTE_ORDER`` for portability. (#513, #514)
+* Add Python 3.11 wheels (#517)
+* fallback: Fix packing multidimensional memoryview (#527)
+
+1.0.4
+=====
+
+Release Date: 2022-06-03
+
+* Support Python 3.11 (beta).
+* Don't define `__*_ENDIAN__` macro on Unix. by @methane in https://github.com/msgpack/msgpack-python/pull/495
+* Use PyFloat_Pack8() on Python 3.11a7 by @vstinner in https://github.com/msgpack/msgpack-python/pull/499
+* Fix Unpacker max_buffer_length handling by @methane in https://github.com/msgpack/msgpack-python/pull/506
+
+1.0.3
+=====
+
+Release Date: 2021-11-24 JST
+
+* Fix Docstring (#459)
+* Fix error formatting (#463)
+* Improve error message about strict_map_key (#485)
+
+1.0.2
+=====
+
+* Fix year 2038 problem regression in 1.0.1. (#451)
+
+1.0.1
+=====
+
+* Add Python 3.9 and linux/arm64 wheels. (#439)
+* Fixed Unpacker.tell() after read_bytes() (#426)
+* Fixed unpacking datetime before epoch on Windows (#433)
+* Fixed fallback Packer didn't check DateTime.tzinfo (#434)
+
+1.0.0
+=====
+
+Release Date: 2020-02-17
+
+* Remove Python 2 support from the ``msgpack/_cmsgpack``.
+ ``msgpack/fallback`` still supports Python 2.
+* Remove ``encoding`` option from the Packer and Unpacker.
+* Unpacker: The default value of ``max_buffer_size`` is changed to 100MiB.
+* Unpacker: ``strict_map_key`` is True by default now.
+* Unpacker: String map keys are interned.
+* Drop old buffer protocol support.
+* Support Timestamp type.
+* Support serializing and decerializing ``datetime`` object
+ with tzinfo.
+* Unpacker: ``Fix Unpacker.read_bytes()`` in fallback implementation. (#352)
+
+
+0.6.2
+=====
+
+Release Date: 2019-09-20
+
+* Support Python 3.8.
+* Update Cython to 0.29.13 for support Python 3.8.
+* Some small optimizations.
+
+
+0.6.1
+======
+
+Release Date: 2019-01-25
+
+This release is for mitigating pain caused by v0.6.0 reduced max input limits
+for security reason.
+
+* ``unpackb(data)`` configures ``max_*_len`` options from ``len(data)``,
+ instead of static default sizes.
+
+* ``Unpacker(max_buffer_len=N)`` configures ``max_*_len`` options from ``N``,
+ instead of static default sizes.
+
+* ``max_bin_len``, ``max_str_len``, and ``max_ext_len`` are deprecated.
+ Since this is minor release, it's document only deprecation.
+
+
+0.6.0
+======
+
+Release Date: 2018-11-30
+
+This release contains some backward incompatible changes for security reason (DoS).
+
+Important changes
+-----------------
+
+* unpacker: Default value of input limits are smaller than before to avoid DoS attack.
+ If you need to handle large data, you need to specify limits manually. (#319)
+
+* Unpacker doesn't wrap underlying ``ValueError`` (including ``UnicodeError``) into
+ ``UnpackValueError``. If you want to catch all exception during unpack, you need
+ to use ``try ... except Exception`` with minimum try code block. (#323, #233)
+
+* ``PackValueError`` and ``PackOverflowError`` are also removed. You need to catch
+ normal ``ValueError`` and ``OverflowError``. (#323, #233)
+
+* Unpacker has ``strict_map_key`` option now. When it is true, only bytes and str
+ (unicode in Python 2) are allowed for map keys. It is recommended to avoid
+ hashdos. Default value of this option is False for backward compatibility reason.
+ But it will be changed True in 1.0. (#296, #334)
+
+Other changes
+-------------
+
+* Extension modules are merged. There is ``msgpack._cmsgpack`` instead of
+ ``msgpack._packer`` and ``msgpack._unpacker``. (#314, #328)
+
+* Add ``Unpacker.getbuffer()`` method. (#320)
+
+* unpacker: ``msgpack.StackError`` is raised when input data contains too
+ nested data. (#331)
+
+* unpacker: ``msgpack.FormatError`` is raised when input data is not valid
+ msgpack format. (#331)
+
+
+0.5.6
+======
+
+* Fix fallback.Unpacker.feed() dropped unused data from buffer (#287)
+* Resurrect fallback.unpack() and _unpacker.unpack().
+ They were removed at 0.5.5 but it breaks backward compatibility. (#288, #290)
+
+0.5.5
+======
+
+* Fix memory leak in pure Python Unpacker.feed() (#283)
+* Fix unpack() didn't support `raw` option (#285)
+
+0.5.4
+======
+
+* Undeprecate ``unicode_errors`` option. (#278)
+
+0.5.3
+======
+
+* Fixed regression when passing ``unicode_errors`` to Packer but not ``encoding``. (#277)
+
+0.5.2
+======
+
+* Add ``raw`` option to Unpacker. It is preferred way than ``encoding`` option.
+
+* Packer.pack() reset buffer on exception (#274)
+
+
+0.5.1
+======
+
+* Remove FutureWarning about use_bin_type option (#271)
+
+0.5.0
+======
+
+There are some deprecations. Please read changes carefully.
+
+Changes
+-------
+
+* Drop Python 2.6 and ~3.4 support. Python 2.7 and 3.5+ are supported.
+
+* Deprecate useless custom exceptions. Use ValueError instead of PackValueError,
+ Exception instead of PackException and UnpackException, etc...
+ See msgpack/exceptions.py
+
+* Add *strict_types* option to packer. It can be used to serialize subclass of
+ builtin types. For example, when packing object which type is subclass of dict,
+ ``default()`` is called. ``default()`` is called for tuple too.
+
+* Pure Python implementation supports packing memoryview object.
+
+* Support packing bytearray.
+
+* Add ``Unpacker.tell()``. And ``write_bytes`` option is deprecated.
+
+
+Bugs fixed
+----------
+
+* Fixed zero length raw can't be decoded when encoding is specified. (#236)
+
+
+0.4.8
+=====
+:release date: 2016-07-29
+
+Bugs fixed
+----------
+
+* Calling ext_hook with wrong length. (Only on Windows, maybe. #203)
+
+
+0.4.7
+=====
+:release date: 2016-01-25
+
+Bugs fixed
+----------
+
+* Memory leak when unpack is failed
+
+Changes
+-------
+
+* Reduce compiler warnings while building extension module
+* unpack() now accepts ext_hook argument like Unpacker and unpackb()
+* Update Cython version to 0.23.4
+* default function is called when integer overflow
+
+
+0.4.6
+=====
+:release date: 2015-03-13
+
+Bugs fixed
+----------
+
+* fallback.Unpacker: Fix Data corruption when OutOfData.
+ This bug only affects "Streaming unpacking."
+
+
+0.4.5
+=====
+:release date: 2015-01-25
+
+Incompatible Changes
+--------------------
+
+Changes
+-------
+
+Bugs fixed
+----------
+
+* Fix test failure on pytest 2.3. (by @ktdreyer)
+* Fix typos in ChangeLog. (Thanks to @dmick)
+* Improve README.rst (by @msabramo)
+
+
+0.4.4
+=====
+:release date: 2015-01-09
+
+Incompatible Changes
+--------------------
+
+Changes
+-------
+
+Bugs fixed
+----------
+
+* Fix compile error.
+
+0.4.3
+=====
+:release date: 2015-01-07
+
+Incompatible Changes
+--------------------
+
+Changes
+-------
+
+Bugs fixed
+----------
+
+* Unpacker may unpack wrong uint32 value on 32bit or LLP64 environment. (#101)
+* Build failed on Windows Python 2.7.
+
+0.4.2
+=====
+:release date: 2014-03-26
+
+Incompatible Changes
+--------------------
+
+Changes
+-------
+
+Bugs fixed
+----------
+
+* Unpacker doesn't increment refcount of ExtType hook.
+* Packer raises no exception for inputs doesn't fit to msgpack format.
+
+0.4.1
+=====
+:release date: 2014-02-17
+
+Incompatible Changes
+--------------------
+
+Changes
+-------
+
+* fallback.Unpacker.feed() supports bytearray.
+
+Bugs fixed
+----------
+
+* Unpacker doesn't increment refcount of hooks. Hooks may be GCed while unpacking.
+* Unpacker may read unfilled internal buffer.
+
+0.4.0
+=====
+:release date: 2013-10-21
+
+Incompatible Changes
+--------------------
+
+* Raises TypeError instead of ValueError when packer receives unsupported type.
+
+Changes
+-------
+
+* Support New msgpack spec.
+
+
0.3.0
=====
-:release date: in development
-Inconpatible Changes
+Incompatible Changes
--------------------
* Default value of ``use_list`` is ``True`` for now. (It was ``False`` for 0.2.x)
@@ -25,7 +435,7 @@ Changes
0.2.4
-=======
+=====
:release date: 2012-12-22
Bugs fixed
@@ -34,7 +444,7 @@ Bugs fixed
* Fix SEGV when object_hook or object_pairs_hook raise Exception. (#39)
0.2.3
-=======
+=====
:release date: 2012-12-11
Changes
@@ -42,11 +452,11 @@ Changes
* Warn when use_list is not specified. It's default value will be changed in 0.3.
Bugs fixed
------------
+----------
* Can't pack subclass of dict.
0.2.2
-=======
+=====
:release date: 2012-09-21
Changes
@@ -55,7 +465,7 @@ Changes
object in single precision format.
Bugs fixed
------------
+----------
* ``unpack()`` didn't restores gc state when it called with gc disabled.
``unpack()`` doesn't control gc now instead of restoring gc state collectly.
User can control gc state when gc cause performance issue.
@@ -63,7 +473,7 @@ Bugs fixed
* ``Unpacker``'s ``read_size`` option didn't used.
0.2.1
-=======
+=====
:release date: 2012-08-20
Changes
@@ -71,8 +481,8 @@ Changes
* Add ``max_buffer_size`` parameter to Unpacker. It limits internal buffer size
and allows unpack data from untrusted source safely.
-* Unpacker's buffer reallocation algorithm is less greedy now. It cause perforamce
- derease in rare case but memory efficient and don't allocate than ``max_buffer_size``.
+* Unpacker's buffer reallocation algorithm is less greedy now. It cause performance
+ decrease in rare case but memory efficient and don't allocate than ``max_buffer_size``.
Bugs fixed
----------
@@ -82,7 +492,7 @@ Bugs fixed
0.2.0
-=======
+=====
:release date: 2012-06-27
Changes
@@ -97,16 +507,16 @@ Bugs fixed
0.1.13
-=======
+======
:release date: 2012-04-21
New
-----
+---
* Don't accept subtype of list and tuple as msgpack list. (Steeve Morin)
It allows customize how it serialized with ``default`` argument.
Bugs fixed
------------
+----------
* Fix wrong error message. (David Wolever)
* Fix memory leak while unpacking when ``object_hook`` or ``list_hook`` is used.
(Steeve Morin)
@@ -118,21 +528,21 @@ Other changes
0.1.12
-=======
+======
:release date: 2011-12-27
Bugs fixed
--------------
+----------
* Re-enable packs/unpacks removed at 0.1.11. It will be removed when 0.2 is released.
0.1.11
-=======
+======
:release date: 2011-12-26
Bugs fixed
--------------
+----------
* Include test code for Python3 to sdist. (Johan Bergström)
* Fix compilation error on MSVC. (davidgaleano)
@@ -150,7 +560,7 @@ New feature
0.1.9
-======
+=====
:release date: 2011-01-29
New feature
@@ -164,16 +574,16 @@ Bugs fixed
* Add MemoryError check.
0.1.8
-======
+=====
:release date: 2011-01-10
New feature
-------------
+-----------
* Support ``loads`` and ``dumps`` aliases for API compatibility with
simplejson and pickle.
* Add *object_hook* and *list_hook* option to unpacker. It allows you to
- hook unpacing mapping type and array type.
+ hook unpacking mapping type and array type.
* Add *default* option to packer. It allows you to pack unsupported types.
@@ -185,13 +595,13 @@ Bugs fixed
0.1.7
-======
+=====
:release date: 2010-11-02
New feature
-------------
+-----------
* Add *object_hook* and *list_hook* option to unpacker. It allows you to
- hook unpacing mapping type and array type.
+ hook unpacking mapping type and array type.
* Add *default* option to packer. It allows you to pack unsupported types.
diff --git a/DEVELOP.md b/DEVELOP.md
new file mode 100644
index 00000000..27adf8c0
--- /dev/null
+++ b/DEVELOP.md
@@ -0,0 +1,17 @@
+# Developer's note
+
+### Build
+
+```
+$ make cython
+```
+
+
+### Test
+
+MessagePack uses `pytest` for testing.
+Run test with following command:
+
+```
+$ make test
+```
diff --git a/MANIFEST.in b/MANIFEST.in
index e1912cac..57d84a4c 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,5 @@
include setup.py
include COPYING
-include README.rst
+include README.md
recursive-include msgpack *.h *.c *.pyx *.cpp
recursive-include test *.py
diff --git a/Makefile b/Makefile
index 3fe278ec..51f3e0ef 100644
--- a/Makefile
+++ b/Makefile
@@ -1,19 +1,59 @@
-.PHONY: test all python3
+PYTHON_SOURCES = msgpack test setup.py
+.PHONY: all
all: cython
python setup.py build_ext -i -f
-doc-serve: all
- cd docs && make serve
+.PHONY: format
+format:
+ ruff format $(PYTHON_SOURCES)
+
+.PHONY: lint
+lint:
+ ruff check $(PYTHON_SOURCES)
+.PHONY: doc
doc:
- cd docs && make zip
+ cd docs && sphinx-build -n -v -W --keep-going -b html -d doctrees . html
+.PHONY: pyupgrade
+pyupgrade:
+ @find $(PYTHON_SOURCES) -name '*.py' -type f -exec pyupgrade --py37-plus '{}' \;
+
+.PHONY: cython
cython:
- cython msgpack/*.pyx
+ cython msgpack/_cmsgpack.pyx
+
+.PHONY: test
+test: cython
+ pip install -e .
+ pytest -v test
+ MSGPACK_PUREPYTHON=1 pytest -v test
+
+.PHONY: serve-doc
+serve-doc: all
+ cd docs && make serve
+
+.PHONY: clean
+clean:
+ rm -rf build
+ rm -f msgpack/_cmsgpack.cpp
+ rm -f msgpack/_cmsgpack.*.so
+ rm -f msgpack/_cmsgpack.*.pyd
+ rm -rf msgpack/__pycache__
+ rm -rf test/__pycache__
+
+.PHONY: update-docker
+update-docker:
+ docker pull quay.io/pypa/manylinux2014_i686
+ docker pull quay.io/pypa/manylinux2014_x86_64
+ docker pull quay.io/pypa/manylinux2014_aarch64
-python3: cython
- python3 setup.py build_ext -i -f
+.PHONY: linux-wheel
+linux-wheel:
+ docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_i686 bash docker/buildwheel.sh
+ docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_x86_64 bash docker/buildwheel.sh
-test:
- py.test test
+.PHONY: linux-arm64-wheel
+linux-arm64-wheel:
+ docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_aarch64 bash docker/buildwheel.sh
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..61a03c60
--- /dev/null
+++ b/README.md
@@ -0,0 +1,242 @@
+# MessagePack for Python
+
+[](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml)
+[](https://msgpack-python.readthedocs.io/en/latest/?badge=latest)
+
+## What's this
+
+[MessagePack](https://msgpack.org/) is an efficient binary serialization format.
+It lets you exchange data among multiple languages like JSON.
+But it's faster and smaller.
+This package provides CPython bindings for reading and writing MessagePack data.
+
+## Install
+
+```
+$ pip install msgpack
+```
+
+### Pure Python implementation
+
+The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy.
+
+But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy.
+
+
+### Windows
+
+When you can't use a binary distribution, you need to install Visual Studio
+or Windows SDK on Windows.
+Without extension, using pure Python implementation on CPython runs slowly.
+
+
+## How to use
+
+### One-shot pack & unpack
+
+Use `packb` for packing and `unpackb` for unpacking.
+msgpack provides `dumps` and `loads` as an alias for compatibility with
+`json` and `pickle`.
+
+`pack` and `dump` packs to a file-like object.
+`unpack` and `load` unpacks from a file-like object.
+
+```pycon
+>>> import msgpack
+>>> msgpack.packb([1, 2, 3])
+'\x93\x01\x02\x03'
+>>> msgpack.unpackb(_)
+[1, 2, 3]
+```
+
+Read the docstring for options.
+
+
+### Streaming unpacking
+
+`Unpacker` is a "streaming unpacker". It unpacks multiple objects from one
+stream (or from bytes provided through its `feed` method).
+
+```py
+import msgpack
+from io import BytesIO
+
+buf = BytesIO()
+for i in range(100):
+ buf.write(msgpack.packb(i))
+
+buf.seek(0)
+
+unpacker = msgpack.Unpacker(buf)
+for unpacked in unpacker:
+ print(unpacked)
+```
+
+
+### Packing/unpacking of custom data type
+
+It is also possible to pack/unpack custom data types. Here is an example for
+`datetime.datetime`.
+
+```py
+import datetime
+import msgpack
+
+useful_dict = {
+ "id": 1,
+ "created": datetime.datetime.now(),
+}
+
+def decode_datetime(obj):
+ if '__datetime__' in obj:
+ obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f")
+ return obj
+
+def encode_datetime(obj):
+ if isinstance(obj, datetime.datetime):
+ return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")}
+ return obj
+
+
+packed_dict = msgpack.packb(useful_dict, default=encode_datetime)
+this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
+```
+
+`Unpacker`'s `object_hook` callback receives a dict; the
+`object_pairs_hook` callback may instead be used to receive a list of
+key-value pairs.
+
+NOTE: msgpack can encode datetime with tzinfo into standard ext type for now.
+See `datetime` option in `Packer` docstring.
+
+
+### Extended types
+
+It is also possible to pack/unpack custom data types using the **ext** type.
+
+```pycon
+>>> import msgpack
+>>> import array
+>>> def default(obj):
+... if isinstance(obj, array.array) and obj.typecode == 'd':
+... return msgpack.ExtType(42, obj.tostring())
+... raise TypeError("Unknown type: %r" % (obj,))
+...
+>>> def ext_hook(code, data):
+... if code == 42:
+... a = array.array('d')
+... a.fromstring(data)
+... return a
+... return ExtType(code, data)
+...
+>>> data = array.array('d', [1.2, 3.4])
+>>> packed = msgpack.packb(data, default=default)
+>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
+>>> data == unpacked
+True
+```
+
+
+### Advanced unpacking control
+
+As an alternative to iteration, `Unpacker` objects provide `unpack`,
+`skip`, `read_array_header` and `read_map_header` methods. The former two
+read an entire message from the stream, respectively de-serialising and returning
+the result, or ignoring it. The latter two methods return the number of elements
+in the upcoming container, so that each element in an array, or key-value pair
+in a map, can be unpacked or skipped individually.
+
+
+## Notes
+
+### string and binary type in old msgpack spec
+
+Early versions of msgpack didn't distinguish string and binary types.
+The type for representing both string and binary types was named **raw**.
+
+You can pack into and unpack from this old spec using `use_bin_type=False`
+and `raw=True` options.
+
+```pycon
+>>> import msgpack
+>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True)
+[b'spam', b'eggs']
+>>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False)
+[b'spam', 'eggs']
+```
+
+### ext type
+
+To use the **ext** type, pass `msgpack.ExtType` object to packer.
+
+```pycon
+>>> import msgpack
+>>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy'))
+>>> msgpack.unpackb(packed)
+ExtType(code=42, data='xyzzy')
+```
+
+You can use it with `default` and `ext_hook`. See below.
+
+
+### Security
+
+To unpacking data received from unreliable source, msgpack provides
+two security options.
+
+`max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size.
+It is used to limit the preallocated list size too.
+
+`strict_map_key` (default: `True`) limits the type of map keys to bytes and str.
+While msgpack spec doesn't limit the types of the map keys,
+there is a risk of the hashdos.
+If you need to support other types for map keys, use `strict_map_key=False`.
+
+
+### Performance tips
+
+CPython's GC starts when growing allocated object.
+This means unpacking may cause useless GC.
+You can use `gc.disable()` when unpacking large message.
+
+List is the default sequence type of Python.
+But tuple is lighter than list.
+You can use `use_list=False` while unpacking when performance is important.
+
+
+## Major breaking changes in the history
+
+### msgpack 0.5
+
+Package name on PyPI was changed from `msgpack-python` to `msgpack` from 0.5.
+
+When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before
+`pip install -U msgpack`.
+
+
+### msgpack 1.0
+
+* Python 2 support
+
+ * The extension module does not support Python 2 anymore.
+ The pure Python implementation (`msgpack.fallback`) is used for Python 2.
+
+ * msgpack 1.0.6 drops official support of Python 2.7, as pip and
+ GitHub Action (setup-python) no longer support Python 2.7.
+
+* Packer
+
+ * Packer uses `use_bin_type=True` by default.
+ Bytes are encoded in bin type in msgpack.
+ * The `encoding` option is removed. UTF-8 is used always.
+
+* Unpacker
+
+ * Unpacker uses `raw=False` by default. It assumes str types are valid UTF-8 string
+ and decode them to Python str (unicode) object.
+ * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str).
+ * Default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attack.
+ You need to pass `max_buffer_size=0` if you have large but safe data.
+ * Default value of `strict_map_key` is changed to True to avoid hashdos.
+ You need to pass `strict_map_key=False` if you have data which contain map keys
+ which type is not bytes or str.
diff --git a/README.rst b/README.rst
deleted file mode 100644
index da2f0236..00000000
--- a/README.rst
+++ /dev/null
@@ -1,205 +0,0 @@
-=======================
-MessagePack for Python
-=======================
-
-:author: INADA Naoki
-:version: 0.3.0
-:date: 2012-12-07
-
-.. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png
- :target: https://travis-ci.org/#!/msgpack/msgpack-python
-
-What's this
-------------
-
-`MessagePack `_ is a fast, compact binary serialization format, suitable for
-similar data to JSON. This package provides CPython bindings for reading and
-writing MessagePack data.
-
-Install
----------
-You can use ``pip`` or ``easy_install`` to install msgpack::
-
- $ easy_install msgpack-python
- or
- $ pip install msgpack-python
-
-PyPy
-^^^^^
-
-msgpack-python provides pure python implementation.
-PyPy can use this.
-
-Windows
-^^^^^^^
-
-When you can't use binary distribution, you need to install Visual Studio
-or Windows SDK on Windows. (NOTE: Visual C++ Express 2010 doesn't support
-amd64. Windows SDK is recommanded way to build amd64 msgpack without any fee.)
-
-Without extension, using pure python implementation on CPython runs slowly.
-
-Note for msgpack 0.2.x users
-----------------------------
-
-The msgpack 0.3 have some incompatible changes.
-
-The default value of ``use_list`` keyword argument is ``True`` from 0.3.
-You should pass the argument explicitly for backward compatibility.
-
-`Unpacker.unpack()` and some unpack methods now raises `OutOfData`
-instead of `StopIteration`.
-`StopIteration` is used for iterator protocol only.
-
-
-How to use
------------
-
-One-shot pack & unpack
-^^^^^^^^^^^^^^^^^^^^^^
-
-Use ``packb`` for packing and ``unpackb`` for unpacking.
-msgpack provides ``dumps`` and ``loads`` as alias for compatibility with
-``json`` and ``pickle``.
-
-``pack`` and ``dump`` packs to file-like object.
-``unpack`` and ``load`` unpacks from file-like object.
-
-::
-
- >>> import msgpack
- >>> msgpack.packb([1, 2, 3])
- '\x93\x01\x02\x03'
- >>> msgpack.unpackb(_)
- [1, 2, 3]
-
-``unpack`` unpacks msgpack's array to Python's list, but can unpack to tuple::
-
- >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False)
- (1, 2, 3)
-
-You should always pass the ``use_list`` keyword argument. See performance issues relating to use_list_ below.
-
-Read the docstring for other options.
-
-
-Streaming unpacking
-^^^^^^^^^^^^^^^^^^^
-
-``Unpacker`` is a "streaming unpacker". It unpacks multiple objects from one
-stream (or from bytes provided through its ``feed`` method).
-
-::
-
- import msgpack
- from io import BytesIO
-
- buf = BytesIO()
- for i in range(100):
- buf.write(msgpack.packb(range(i)))
-
- buf.seek(0)
-
- unpacker = msgpack.Unpacker(buf)
- for unpacked in unpacker:
- print unpacked
-
-
-Packing/unpacking of custom data type
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-It is also possible to pack/unpack custom data types. Here is an example for
-``datetime.datetime``.
-
-::
-
- import datetime
-
- import msgpack
-
- useful_dict = {
- "id": 1,
- "created": datetime.datetime.now(),
- }
-
- def decode_datetime(obj):
- if b'__datetime__' in obj:
- obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f")
- return obj
-
- def encode_datetime(obj):
- if isinstance(obj, datetime.datetime):
- return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")}
- return obj
-
-
- packed_dict = msgpack.packb(useful_dict, default=encode_datetime)
- this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
-
-``Unpacker``'s ``object_hook`` callback receives a dict; the
-``object_pairs_hook`` callback may instead be used to receive a list of
-key-value pairs.
-
-
-Advanced unpacking control
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-As an alternative to iteration, ``Unpacker`` objects provide ``unpack``,
-``skip``, ``read_array_header`` and ``read_map_header`` methods. The former two
-read an entire message from the stream, respectively deserialising and returning
-the result, or ignoring it. The latter two methods return the number of elements
-in the upcoming container, so that each element in an array, or key-value pair
-in a map, can be unpacked or skipped individually.
-
-Warning: these methods raise ``StopIteration`` when called at the end of the
-stream. Unless caught, this may silently break an iteration.
-
-Each of these methods may optionally write the packed data it reads to a
-callback function:
-
-::
-
- from io import BytesIO
-
- def distribute(unpacker, get_worker):
- nelems = unpacker.read_map_header()
- for i in range(nelems):
- # Select a worker for the given key
- key = unpacker.unpack()
- worker = get_worker(key)
-
- # Send the value as a packed message to worker
- bytestream = BytesIO()
- unpacker.skip(bytestream.write)
- worker.send(bytestream.getvalue())
-
-Note about performance
-------------------------
-
-GC
-^^
-
-CPython's GC starts when growing allocated object.
-This means unpacking may cause useless GC.
-You can use ``gc.disable()`` when unpacking large message.
-
-`use_list` option
-^^^^^^^^^^^^^^^^^^
-List is the default sequence type of Python.
-But tuple is lighter than list.
-You can use ``use_list=False`` while unpacking when performance is important.
-
-Python's dict can't use list as key and MessagePack allows array for key of mapping.
-``use_list=False`` allows unpacking such message.
-Another way to unpacking such object is using ``object_pairs_hook``.
-
-
-Test
-----
-MessagePack uses `pytest` for testing.
-Run test with following command:
-
- $ py.test
-
-..
- vim: filetype=rst
diff --git a/ROADMAP.md b/ROADMAP.md
deleted file mode 100644
index 5245cc0b..00000000
--- a/ROADMAP.md
+++ /dev/null
@@ -1,7 +0,0 @@
-0.2 series
-==========
-Improve compatibility to simplejson.
-
-0.3 series
-==========
-Add features msgpack-ruby has.
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 00000000..75f0c541
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,5 @@
+## Security contact information
+
+To report a security vulnerability, please use the
+[Tidelift security contact](https://tidelift.com/security).
+Tidelift will coordinate the fix and disclosure.
\ No newline at end of file
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index 14392892..2e778dd2 100644
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -1,6 +1,8 @@
from msgpack import fallback
+
try:
- from msgpack import _unpacker, _packer
+ from msgpack import _cmsgpack
+
has_ext = True
except ImportError:
has_ext = False
@@ -9,24 +11,28 @@
def profile(name, func):
times = timeit.repeat(func, number=1000, repeat=4)
- times = ', '.join(["%8f" % t for t in times])
+ times = ", ".join(["%8f" % t for t in times])
print("%-30s %40s" % (name, times))
def simple(name, data):
if has_ext:
- profile("packing %s (ext)" % name, lambda: _packer.packb(data))
- profile('packing %s (fallback)' % name, lambda: fallback.packb(data))
+ packer = _cmsgpack.Packer()
+ profile("packing %s (ext)" % name, lambda: packer.pack(data))
+ packer = fallback.Packer()
+ profile("packing %s (fallback)" % name, lambda: packer.pack(data))
- data = fallback.packb(data)
+ data = packer.pack(data)
if has_ext:
- profile('unpacking %s (ext)' % name, lambda: _unpacker.unpackb(data))
- profile('unpacking %s (fallback)' % name, lambda: fallback.unpackb(data))
+ profile("unpacking %s (ext)" % name, lambda: _cmsgpack.unpackb(data))
+ profile("unpacking %s (fallback)" % name, lambda: fallback.unpackb(data))
+
def main():
- simple("integers", [7]*10000)
- simple("bytes", [b'x'*n for n in range(100)]*10)
- simple("lists", [[]]*10000)
- simple("dicts", [{}]*10000)
+ simple("integers", [7] * 10000)
+ simple("bytes", [b"x" * n for n in range(100)] * 10)
+ simple("lists", [[]] * 10000)
+ simple("dicts", [{}] * 10000)
+
main()
diff --git a/build_windows.bat b/build_windows.bat
deleted file mode 100644
index a71c0e07..00000000
--- a/build_windows.bat
+++ /dev/null
@@ -1,24 +0,0 @@
-set MSSdk=1
-set DISTUTILS_USE_SDK=1
-
-rem Python27 x86
-rem call "C:\Program Files\Microsoft SDKs\Windows\v6.1\Bin\SetEnv.cmd" /Release /x86 /xp
-call "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin\vcvars32.bat"
-c:\Python27\python setup.py build_ext -f build install
-pause
-
-rem Python27 amd64
-rem call "C:\Program Files\Microsoft SDKs\Windows\v6.1\Bin\SetEnv.cmd" /Release /x64 /xp
-call "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin\vcvars64.bat"
-c:\Python27_amd64\python setup.py build_ext -f build install
-pause
-
-rem Python33 x86
-call "C:\Program Files\Microsoft SDKs\Windows\v7.1\bin\SetEnv.cmd" /Release /x86 /xp
-c:\Python33\python setup.py build_ext -f build install
-pause
-
-rem Python33 amd64
-call "C:\Program Files\Microsoft SDKs\Windows\v7.1\bin\SetEnv.cmd" /Release /x64 /xp
-c:\Python33_amd64\python setup.py build_ext -f build install
-pause
diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh
new file mode 100644
index 00000000..ff34139d
--- /dev/null
+++ b/docker/buildwheel.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+DOCKER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+source "$DOCKER_DIR/shared.env"
+
+set -e -x
+
+ARCH=`uname -p`
+echo "arch=$ARCH"
+
+ls /opt/python
+
+for V in "${PYTHON_VERSIONS[@]}"; do
+ PYBIN=/opt/python/$V/bin
+ rm -rf build/ # Avoid lib build by narrow Python is used by wide python
+ $PYBIN/python -m build -w
+done
+
+cd dist
+for whl in *.whl; do
+ auditwheel repair "$whl"
+ rm "$whl"
+done
diff --git a/docker/runtests.sh b/docker/runtests.sh
new file mode 100755
index 00000000..fa7e979b
--- /dev/null
+++ b/docker/runtests.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+DOCKER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+source "$DOCKER_DIR/shared.env"
+
+set -e -x
+
+for V in "${PYTHON_VERSIONS[@]}"; do
+ PYBIN=/opt/python/$V/bin
+ $PYBIN/python setup.py install
+ rm -rf build/ # Avoid lib build by narrow Python is used by wide python
+ $PYBIN/pip install pytest
+ pushd test # prevent importing msgpack package in current directory.
+ $PYBIN/python -c 'import sys; print(hex(sys.maxsize))'
+ $PYBIN/python -c 'from msgpack import _cmsgpack' # Ensure extension is available
+ $PYBIN/pytest -v .
+ popd
+done
diff --git a/docker/shared.env b/docker/shared.env
new file mode 100644
index 00000000..80274ac6
--- /dev/null
+++ b/docker/shared.env
@@ -0,0 +1,7 @@
+PYTHON_VERSIONS=(
+ cp310-cp310
+ cp39-cp39
+ cp38-cp38
+ cp37-cp37m
+ cp36-cp36m
+)
diff --git a/docs/Makefile b/docs/Makefile
index 08696047..831a6a7f 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -153,7 +153,7 @@ doctest:
"results in $(BUILDDIR)/doctest/output.txt."
serve: html
- cd _build/html && python3.3 -m http.server
+ python3 -m http.server -d _build/html
zip: html
cd _build/html && zip -r ../../../msgpack-doc.zip .
diff --git a/docs/_static/README.txt b/docs/_static/README.txt
new file mode 100644
index 00000000..1c70594f
--- /dev/null
+++ b/docs/_static/README.txt
@@ -0,0 +1 @@
+Sphinx will copy the contents of docs/_static/ directory to the build location.
diff --git a/docs/advanced.rst b/docs/advanced.rst
new file mode 100644
index 00000000..38370088
--- /dev/null
+++ b/docs/advanced.rst
@@ -0,0 +1,32 @@
+Advanced usage
+===============
+
+Packer
+------
+
+autoreset
+~~~~~~~~~
+
+When you used ``autoreset=False`` option of :class:`~msgpack.Packer`,
+``pack()`` method doesn't return packed ``bytes``.
+
+You can use :meth:`~msgpack.Packer.bytes` or :meth:`~msgpack.Packer.getbuffer` to
+get packed data.
+
+``bytes()`` returns ``bytes`` object. ``getbuffer()`` returns some bytes-like
+object. It's concrete type is implement detail and it will be changed in future
+versions.
+
+You can reduce temporary bytes object by using ``Unpacker.getbuffer()``.
+
+.. code-block:: python
+
+ packer = Packer(use_bin_type=True, autoreset=False)
+
+ packer.pack([1, 2])
+ packer.pack([3, 4])
+
+ with open('data.bin', 'wb') as f:
+ f.write(packer.getbuffer())
+
+ packer.reset() # reset internal buffer
diff --git a/docs/api.rst b/docs/api.rst
index 50a84c45..f5dfbbd2 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -5,19 +5,19 @@ API reference
.. autofunction:: pack
-:func:`dump` is alias for :func:`pack`
+``dump()`` is an alias for :func:`pack`
.. autofunction:: packb
-:func:`dumps` is alias for :func:`packb`
+``dumps()`` is an alias for :func:`packb`
.. autofunction:: unpack
-:func:`load` is alias for :func:`unpack`
+``load()`` is an alias for :func:`unpack`
.. autofunction:: unpackb
-:func:`loads` is alias for :func:`unpackb`
+``loads()`` is an alias for :func:`unpackb`
.. autoclass:: Packer
:members:
@@ -25,8 +25,14 @@ API reference
.. autoclass:: Unpacker
:members:
+.. autoclass:: ExtType
+
+.. autoclass:: Timestamp
+ :members:
+ :special-members: __init__
+
exceptions
------------
+----------
These exceptions are accessible via `msgpack` package.
(For example, `msgpack.OutOfData` is shortcut for `msgpack.exceptions.OutOfData`)
diff --git a/docs/conf.py b/docs/conf.py
index fba09b72..28116cd6 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-#
# msgpack documentation build configuration file, created by
# sphinx-quickstart on Sun Feb 24 14:20:50 2013.
#
@@ -11,37 +9,37 @@
# All configuration values have a default; values that are commented out
# serve to show the default.
-import sys, os
-
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('..'))
# -- General configuration -----------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
+extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"]
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
# The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
# General information about the project.
-project = u'msgpack'
-copyright = u'2013, INADA Naoki'
+project = "msgpack"
+copyright = "Inada Naoki"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
@@ -49,176 +47,170 @@
#
# The short X.Y version.
# The full version, including alpha/beta/rc tags.
-version = release = '0.3'
+version = release = "1.0"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
-#language = None
+# language = None
# There are two options for replacing |today|: either, you set today to some
# non-false value, then it is used:
-#today = ''
+# today = ''
# Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
today_fmt = "%Y-%m-%d"
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ["_build"]
# The reST default role (used for this markup: `text`) to use for all documents.
-#default_role = None
+# default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
-#show_authors = False
+# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
# -- Options for HTML output ---------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
-html_theme = 'sphinxdoc'
+html_theme = "sphinx_rtd_theme"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
-#html_theme_options = {}
+# html_theme_options = {}
# Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
+# html_theme_path = []
# The name for this set of Sphinx documents. If None, it defaults to
# " v documentation".
-#html_title = None
+# html_title = None
# A shorter title for the navigation bar. Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
-#html_logo = None
+# html_logo = None
# The name of an image file (within the static path) to use as favicon of the
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
-#html_favicon = None
+# html_favicon = None
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
# Additional templates that should be rendered to pages, maps page names to
# template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
# If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
# If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
# If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
# If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
# If true, an OpenSearch description file will be output, and all pages will
# contain a tag referring to it. The value of this option must be the
# base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
# This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
# Output file base name for HTML help builder.
-htmlhelp_basename = 'msgpackdoc'
+htmlhelp_basename = "msgpackdoc"
# -- Options for LaTeX output --------------------------------------------------
latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
-
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
+ # The paper size ('letterpaper' or 'a4paper').
+ #'papersize': 'letterpaper',
+ # The font size ('10pt', '11pt' or '12pt').
+ #'pointsize': '10pt',
+ # Additional stuff for the LaTeX preamble.
+ #'preamble': '',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
- ('index', 'msgpack.tex', u'msgpack Documentation',
- u'Author', 'manual'),
+ ("index", "msgpack.tex", "msgpack Documentation", "Author", "manual"),
]
# The name of an image file (relative to this directory) to place at the top of
# the title page.
-#latex_logo = None
+# latex_logo = None
# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
# If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
# If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
# Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
# If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
# -- Options for manual page output --------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
-man_pages = [
- ('index', 'msgpack', u'msgpack Documentation',
- [u'Author'], 1)
-]
+man_pages = [("index", "msgpack", "msgpack Documentation", ["Author"], 1)]
# If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
# -- Options for Texinfo output ------------------------------------------------
@@ -227,59 +219,65 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
- ('index', 'msgpack', u'msgpack Documentation',
- u'Author', 'msgpack', 'One line description of project.',
- 'Miscellaneous'),
+ (
+ "index",
+ "msgpack",
+ "msgpack Documentation",
+ "Author",
+ "msgpack",
+ "One line description of project.",
+ "Miscellaneous",
+ ),
]
# Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
# If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
# -- Options for Epub output ---------------------------------------------------
# Bibliographic Dublin Core info.
-epub_title = u'msgpack'
-epub_author = u'Author'
-epub_publisher = u'Author'
-epub_copyright = u'2013, Author'
+epub_title = "msgpack"
+epub_author = "Author"
+epub_publisher = "Author"
+epub_copyright = "2013, Author"
# The language of the text. It defaults to the language option
# or en if the language is not set.
-#epub_language = ''
+# epub_language = ''
# The scheme of the identifier. Typical schemes are ISBN or URL.
-#epub_scheme = ''
+# epub_scheme = ''
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
-#epub_identifier = ''
+# epub_identifier = ''
# A unique identification for the text.
-#epub_uid = ''
+# epub_uid = ''
# A tuple containing the cover image and cover page html template filenames.
-#epub_cover = ()
+# epub_cover = ()
# HTML files that should be inserted before the pages created by sphinx.
# The format is a list of tuples containing the path and title.
-#epub_pre_files = []
+# epub_pre_files = []
# HTML files shat should be inserted after the pages created by sphinx.
# The format is a list of tuples containing the path and title.
-#epub_post_files = []
+# epub_post_files = []
# A list of files that should not be packed into the epub file.
-#epub_exclude_files = []
+# epub_exclude_files = []
# The depth of the table of contents in toc.ncx.
-#epub_tocdepth = 3
+# epub_tocdepth = 3
# Allow duplicate toc entries.
-#epub_tocdup = True
+# epub_tocdup = True
diff --git a/docs/index.rst b/docs/index.rst
index 72d4499f..e9c2ce83 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,5 +1,5 @@
msgpack document
-==================
+================
`MessagePack `_ is a efficient format for inter
language data exchange.
@@ -8,3 +8,4 @@ language data exchange.
:maxdepth: 1
api
+ advanced
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 00000000..26002de4
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,2 @@
+sphinx~=7.3.7
+sphinx-rtd-theme~=2.0.0
diff --git a/msgpack/__init__.py b/msgpack/__init__.py
index 77f6b81c..ad68271d 100644
--- a/msgpack/__init__.py
+++ b/msgpack/__init__.py
@@ -1,16 +1,51 @@
-# coding: utf-8
-from msgpack._version import version
-from msgpack.exceptions import *
-
+# ruff: noqa: F401
import os
-if os.environ.get('MSGPACK_PUREPYTHON'):
- from msgpack.fallback import pack, packb, Packer, unpack, unpackb, Unpacker
+
+from .exceptions import * # noqa: F403
+from .ext import ExtType, Timestamp
+
+version = (1, 1, 1)
+__version__ = "1.1.1"
+
+
+if os.environ.get("MSGPACK_PUREPYTHON"):
+ from .fallback import Packer, Unpacker, unpackb
else:
try:
- from msgpack._packer import pack, packb, Packer
- from msgpack._unpacker import unpack, unpackb, Unpacker
+ from ._cmsgpack import Packer, Unpacker, unpackb
except ImportError:
- from msgpack.fallback import pack, packb, Packer, unpack, unpackb, Unpacker
+ from .fallback import Packer, Unpacker, unpackb
+
+
+def pack(o, stream, **kwargs):
+ """
+ Pack object `o` and write it to `stream`
+
+ See :class:`Packer` for options.
+ """
+ packer = Packer(**kwargs)
+ stream.write(packer.pack(o))
+
+
+def packb(o, **kwargs):
+ """
+ Pack object `o` and return packed bytes
+
+ See :class:`Packer` for options.
+ """
+ return Packer(**kwargs).pack(o)
+
+
+def unpack(stream, **kwargs):
+ """
+ Unpack an object from `stream`.
+
+ Raises `ExtraData` when `stream` contains extra bytes.
+ See :class:`Unpacker` for options.
+ """
+ data = stream.read()
+ return unpackb(data, **kwargs)
+
# alias for compatibility to simplejson/marshal/pickle.
load = unpack
@@ -18,4 +53,3 @@
dump = pack
dumps = packb
-
diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx
new file mode 100644
index 00000000..1faaac3a
--- /dev/null
+++ b/msgpack/_cmsgpack.pyx
@@ -0,0 +1,11 @@
+# coding: utf-8
+#cython: embedsignature=True, c_string_encoding=ascii, language_level=3
+from cpython.datetime cimport import_datetime, datetime_new
+import_datetime()
+
+import datetime
+cdef object utc = datetime.timezone.utc
+cdef object epoch = datetime_new(1970, 1, 1, 0, 0, 0, 0, tz=utc)
+
+include "_packer.pyx"
+include "_unpacker.pyx"
diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx
index 562c92c8..402b6946 100644
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@@ -1,48 +1,62 @@
# coding: utf-8
-#cython: embedsignature=True
from cpython cimport *
-cdef extern from "Python.h":
- ctypedef char* const_char_ptr "const char*"
- ctypedef char* const_void_ptr "const void*"
- ctypedef struct PyObject
- cdef int PyObject_AsReadBuffer(object o, const_void_ptr* buff, Py_ssize_t* buf_len) except -1
+from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact
+from cpython.datetime cimport (
+ PyDateTime_CheckExact, PyDelta_CheckExact,
+ datetime_tzinfo, timedelta_days, timedelta_seconds, timedelta_microseconds,
+)
+
+cdef ExtType
+cdef Timestamp
-from libc.stdlib cimport *
-from libc.string cimport *
-from libc.limits cimport *
+from .ext import ExtType, Timestamp
-from msgpack.exceptions import PackValueError
+
+cdef extern from "Python.h":
+
+ int PyMemoryView_Check(object obj)
cdef extern from "pack.h":
struct msgpack_packer:
char* buf
size_t length
size_t buf_size
+ bint use_bin_type
+
+ int msgpack_pack_nil(msgpack_packer* pk) except -1
+ int msgpack_pack_true(msgpack_packer* pk) except -1
+ int msgpack_pack_false(msgpack_packer* pk) except -1
+ int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1
+ int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1
+ int msgpack_pack_float(msgpack_packer* pk, float d) except -1
+ int msgpack_pack_double(msgpack_packer* pk, double d) except -1
+ int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1
+ int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1
+ int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1
+ int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1
+ int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1
+ int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1
+ int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1
- int msgpack_pack_int(msgpack_packer* pk, int d)
- int msgpack_pack_nil(msgpack_packer* pk)
- int msgpack_pack_true(msgpack_packer* pk)
- int msgpack_pack_false(msgpack_packer* pk)
- int msgpack_pack_long(msgpack_packer* pk, long d)
- int msgpack_pack_long_long(msgpack_packer* pk, long long d)
- int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d)
- int msgpack_pack_float(msgpack_packer* pk, float d)
- int msgpack_pack_double(msgpack_packer* pk, double d)
- int msgpack_pack_array(msgpack_packer* pk, size_t l)
- int msgpack_pack_map(msgpack_packer* pk, size_t l)
- int msgpack_pack_raw(msgpack_packer* pk, size_t l)
- int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
cdef int DEFAULT_RECURSE_LIMIT=511
+cdef long long ITEM_LIMIT = (2**32)-1
+
+cdef inline int PyBytesLike_Check(object o):
+ return PyBytes_Check(o) or PyByteArray_Check(o)
-cdef class Packer(object):
+cdef inline int PyBytesLike_CheckExact(object o):
+ return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o)
+
+
+cdef class Packer:
"""
MessagePack Packer
- usage::
+ Usage::
packer = Packer()
astream.write(packer.pack(a))
@@ -50,172 +64,248 @@ cdef class Packer(object):
Packer's constructor has some keyword arguments:
- :param callable default:
+ :param default:
+ When specified, it should be callable.
Convert user type to builtin type that Packer supports.
See also simplejson's document.
- :param str encoding:
- Convert unicode to bytes with this encoding. (default: 'utf-8')
- :param str unicode_erros:
- Error handler for encoding unicode. (default: 'strict')
+
:param bool use_single_float:
Use single precision float type for float. (default: False)
+
:param bool autoreset:
- Reset buffer after each pack and return it's content as `bytes`. (default: True).
+ Reset buffer after each pack and return its content as `bytes`. (default: True).
If set this to false, use `bytes()` to get content and `.reset()` to clear buffer.
+
+ :param bool use_bin_type:
+ Use bin type introduced in msgpack spec 2.0 for bytes.
+ It also enables str8 type for unicode. (default: True)
+
+ :param bool strict_types:
+ If set to true, types will be checked to be exact. Derived classes
+ from serializeable types will not be serialized and will be
+ treated as unsupported type and forwarded to default.
+ Additionally tuples will not be serialized as lists.
+ This is useful when trying to implement accurate serialization
+ for python types.
+
+ :param bool datetime:
+ If set to true, datetime with tzinfo is packed into Timestamp type.
+ Note that the tzinfo is stripped in the timestamp.
+ You can get UTC datetime with `timestamp=3` option of the Unpacker.
+
+ :param str unicode_errors:
+ The error handler for encoding unicode. (default: 'strict')
+ DO NOT USE THIS!! This option is kept for very specific usage.
+
+ :param int buf_size:
+ The size of the internal buffer. (default: 256*1024)
+ Useful if serialisation size can be correctly estimated,
+ avoid unnecessary reallocations.
"""
cdef msgpack_packer pk
cdef object _default
- cdef object _bencoding
cdef object _berrors
- cdef char *encoding
- cdef char *unicode_errors
- cdef bool use_float
+ cdef const char *unicode_errors
+ cdef size_t exports # number of exported buffers
+ cdef bint strict_types
+ cdef bint use_float
cdef bint autoreset
+ cdef bint datetime
- def __cinit__(self):
- cdef int buf_size = 1024*1024
- self.pk.buf = malloc(buf_size);
+ def __cinit__(self, buf_size=256*1024, **_kwargs):
+ self.pk.buf = PyMem_Malloc(buf_size)
if self.pk.buf == NULL:
raise MemoryError("Unable to allocate internal buffer.")
self.pk.buf_size = buf_size
self.pk.length = 0
+ self.exports = 0
- def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False, bint autoreset=1):
- """
- """
+ def __dealloc__(self):
+ PyMem_Free(self.pk.buf)
+ self.pk.buf = NULL
+ assert self.exports == 0
+
+ cdef _check_exports(self):
+ if self.exports > 0:
+ raise BufferError("Existing exports of data: Packer cannot be changed")
+
+ def __init__(self, *, default=None,
+ bint use_single_float=False, bint autoreset=True, bint use_bin_type=True,
+ bint strict_types=False, bint datetime=False, unicode_errors=None,
+ buf_size=256*1024):
self.use_float = use_single_float
+ self.strict_types = strict_types
self.autoreset = autoreset
+ self.datetime = datetime
+ self.pk.use_bin_type = use_bin_type
if default is not None:
if not PyCallable_Check(default):
raise TypeError("default must be a callable.")
self._default = default
- if encoding is None:
- self.encoding = NULL
+
+ self._berrors = unicode_errors
+ if unicode_errors is None:
self.unicode_errors = NULL
else:
- if isinstance(encoding, unicode):
- self._bencoding = encoding.encode('ascii')
- else:
- self._bencoding = encoding
- self.encoding = PyBytes_AsString(self._bencoding)
- if isinstance(unicode_errors, unicode):
- self._berrors = unicode_errors.encode('ascii')
- else:
- self._berrors = unicode_errors
- self.unicode_errors = PyBytes_AsString(self._berrors)
+ self.unicode_errors = self._berrors
- def __dealloc__(self):
- free(self.pk.buf);
-
- cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
+ # returns -2 when default should(o) be called
+ cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1:
cdef long long llval
cdef unsigned long long ullval
- cdef long longval
- cdef float fval
- cdef double dval
- cdef char* rawval
- cdef int ret
- cdef dict d
-
- if nest_limit < 0:
- raise PackValueError("recursion limit exceeded.")
+ cdef unsigned long ulval
+ cdef const char* rawval
+ cdef Py_ssize_t L
+ cdef Py_buffer view
+ cdef bint strict = self.strict_types
if o is None:
- ret = msgpack_pack_nil(&self.pk)
- elif isinstance(o, bool):
- if o:
- ret = msgpack_pack_true(&self.pk)
- else:
- ret = msgpack_pack_false(&self.pk)
- elif PyLong_Check(o):
- if o > 0:
- ullval = o
- ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
- else:
- llval = o
- ret = msgpack_pack_long_long(&self.pk, llval)
- elif PyInt_Check(o):
- longval = o
- ret = msgpack_pack_long(&self.pk, longval)
- elif PyFloat_Check(o):
+ msgpack_pack_nil(&self.pk)
+ elif o is True:
+ msgpack_pack_true(&self.pk)
+ elif o is False:
+ msgpack_pack_false(&self.pk)
+ elif PyLong_CheckExact(o) if strict else PyLong_Check(o):
+ try:
+ if o > 0:
+ ullval = o
+ msgpack_pack_unsigned_long_long(&self.pk, ullval)
+ else:
+ llval = o
+ msgpack_pack_long_long(&self.pk, llval)
+ except OverflowError as oe:
+ if will_default:
+ return -2
+ else:
+ raise OverflowError("Integer value out of range")
+ elif PyFloat_CheckExact(o) if strict else PyFloat_Check(o):
if self.use_float:
- fval = o
- ret = msgpack_pack_float(&self.pk, fval)
+ msgpack_pack_float(&self.pk, o)
else:
- dval = o
- ret = msgpack_pack_double(&self.pk, dval)
- elif PyBytes_Check(o):
- rawval = o
- ret = msgpack_pack_raw(&self.pk, len(o))
- if ret == 0:
- ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
- elif PyUnicode_Check(o):
- if not self.encoding:
- raise TypeError("Can't encode unicode string: no encoding is specified")
- o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
+ msgpack_pack_double(&self.pk, o)
+ elif PyBytesLike_CheckExact(o) if strict else PyBytesLike_Check(o):
+ L = Py_SIZE(o)
+ if L > ITEM_LIMIT:
+ PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name)
rawval = o
- ret = msgpack_pack_raw(&self.pk, len(o))
- if ret == 0:
- ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
- elif PyDict_CheckExact(o):
- d = o
- ret = msgpack_pack_map(&self.pk, len(d))
- if ret == 0:
- for k, v in d.iteritems():
- ret = self._pack(k, nest_limit-1)
- if ret != 0: break
- ret = self._pack(v, nest_limit-1)
- if ret != 0: break
- elif PyDict_Check(o):
- ret = msgpack_pack_map(&self.pk, len(o))
- if ret == 0:
- for k, v in o.items():
- ret = self._pack(k, nest_limit-1)
- if ret != 0: break
- ret = self._pack(v, nest_limit-1)
- if ret != 0: break
- elif PyTuple_Check(o) or PyList_Check(o):
- ret = msgpack_pack_array(&self.pk, len(o))
- if ret == 0:
- for v in o:
- ret = self._pack(v, nest_limit-1)
- if ret != 0: break
- elif self._default:
- o = self._default(o)
- ret = self._pack(o, nest_limit-1)
+ msgpack_pack_bin(&self.pk, L)
+ msgpack_pack_raw_body(&self.pk, rawval, L)
+ elif PyUnicode_CheckExact(o) if strict else PyUnicode_Check(o):
+ if self.unicode_errors == NULL:
+ rawval = PyUnicode_AsUTF8AndSize(o, &L)
+ if L >ITEM_LIMIT:
+ raise ValueError("unicode string is too large")
+ else:
+ o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors)
+ L = Py_SIZE(o)
+ if L > ITEM_LIMIT:
+ raise ValueError("unicode string is too large")
+ rawval = o
+ msgpack_pack_raw(&self.pk, L)
+ msgpack_pack_raw_body(&self.pk, rawval, L)
+ elif PyDict_CheckExact(o) if strict else PyDict_Check(o):
+ L = len(o)
+ if L > ITEM_LIMIT:
+ raise ValueError("dict is too large")
+ msgpack_pack_map(&self.pk, L)
+ for k, v in o.items():
+ self._pack(k, nest_limit)
+ self._pack(v, nest_limit)
+ elif type(o) is ExtType if strict else isinstance(o, ExtType):
+ # This should be before Tuple because ExtType is namedtuple.
+ rawval = o.data
+ L = len(o.data)
+ if L > ITEM_LIMIT:
+ raise ValueError("EXT data is too large")
+ msgpack_pack_ext(&self.pk, o.code, L)
+ msgpack_pack_raw_body(&self.pk, rawval, L)
+ elif type(o) is Timestamp:
+ llval = o.seconds
+ ulval = o.nanoseconds
+ msgpack_pack_timestamp(&self.pk, llval, ulval)
+ elif PyList_CheckExact(o) if strict else (PyTuple_Check(o) or PyList_Check(o)):
+ L = Py_SIZE(o)
+ if L > ITEM_LIMIT:
+ raise ValueError("list is too large")
+ msgpack_pack_array(&self.pk, L)
+ for v in o:
+ self._pack(v, nest_limit)
+ elif PyMemoryView_Check(o):
+ PyObject_GetBuffer(o, &view, PyBUF_SIMPLE)
+ L = view.len
+ if L > ITEM_LIMIT:
+ PyBuffer_Release(&view);
+ raise ValueError("memoryview is too large")
+ try:
+ msgpack_pack_bin(&self.pk, L)
+ msgpack_pack_raw_body(&self.pk, view.buf, L)
+ finally:
+ PyBuffer_Release(&view);
+ elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None:
+ delta = o - epoch
+ if not PyDelta_CheckExact(delta):
+ raise ValueError("failed to calculate delta")
+ llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta)
+ ulval = timedelta_microseconds(delta) * 1000
+ msgpack_pack_timestamp(&self.pk, llval, ulval)
+ elif will_default:
+ return -2
+ elif self.datetime and PyDateTime_CheckExact(o):
+ # this should be later than will_default
+ PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name)
else:
- raise TypeError("can't serialize %r" % (o,))
- return ret
+ PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name)
+
+ cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
+ cdef int ret
+ if nest_limit < 0:
+ raise ValueError("recursion limit exceeded.")
+ nest_limit -= 1
+ if self._default is not None:
+ ret = self._pack_inner(o, 1, nest_limit)
+ if ret == -2:
+ o = self._default(o)
+ else:
+ return ret
+ return self._pack_inner(o, 0, nest_limit)
- cpdef pack(self, object obj):
+ def pack(self, object obj):
cdef int ret
- ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)
- if ret == -1:
- raise MemoryError
- elif ret: # should not happen.
- raise TypeError
+ self._check_exports()
+ try:
+ ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)
+ except:
+ self.pk.length = 0
+ raise
+ if ret: # should not happen.
+ raise RuntimeError("internal error")
if self.autoreset:
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
self.pk.length = 0
return buf
- def pack_array_header(self, size_t size):
- cdef int ret = msgpack_pack_array(&self.pk, size)
- if ret == -1:
- raise MemoryError
- elif ret: # should not happen
- raise TypeError
+ def pack_ext_type(self, typecode, data):
+ self._check_exports()
+ if len(data) > ITEM_LIMIT:
+ raise ValueError("ext data too large")
+ msgpack_pack_ext(&self.pk, typecode, len(data))
+ msgpack_pack_raw_body(&self.pk, data, len(data))
+
+ def pack_array_header(self, long long size):
+ self._check_exports()
+ if size > ITEM_LIMIT:
+ raise ValueError("array too large")
+ msgpack_pack_array(&self.pk, size)
if self.autoreset:
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
self.pk.length = 0
return buf
- def pack_map_header(self, size_t size):
- cdef int ret = msgpack_pack_map(&self.pk, size)
- if ret == -1:
- raise MemoryError
- elif ret: # should not happen
- raise TypeError
+ def pack_map_header(self, long long size):
+ self._check_exports()
+ if size > ITEM_LIMIT:
+ raise ValueError("map too learge")
+ msgpack_pack_map(&self.pk, size)
if self.autoreset:
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
self.pk.length = 0
@@ -225,49 +315,44 @@ cdef class Packer(object):
"""
Pack *pairs* as msgpack map type.
- *pairs* should sequence of pair.
+ *pairs* should be a sequence of pairs.
(`len(pairs)` and `for k, v in pairs:` should be supported.)
"""
- cdef int ret = msgpack_pack_map(&self.pk, len(pairs))
- if ret == 0:
- for k, v in pairs:
- ret = self._pack(k)
- if ret != 0: break
- ret = self._pack(v)
- if ret != 0: break
- if ret == -1:
- raise MemoryError
- elif ret: # should not happen
- raise TypeError
+ self._check_exports()
+ size = len(pairs)
+ if size > ITEM_LIMIT:
+ raise ValueError("map too large")
+ msgpack_pack_map(&self.pk, size)
+ for k, v in pairs:
+ self._pack(k)
+ self._pack(v)
if self.autoreset:
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
self.pk.length = 0
return buf
def reset(self):
- """Clear internal buffer."""
+ """Reset internal buffer.
+
+ This method is useful only when autoreset=False.
+ """
+ self._check_exports()
self.pk.length = 0
def bytes(self):
- """Return buffer content."""
+ """Return internal buffer contents as bytes object"""
return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
+ def getbuffer(self):
+ """Return memoryview of internal buffer.
-def pack(object o, object stream, default=None, str encoding='utf-8', str unicode_errors='strict'):
- """
- pack an object `o` and write it to stream)
-
- See :class:`Packer` for options.
- """
- packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors)
- stream.write(packer.pack(o))
+ Note: Packer now supports buffer protocol. You can use memoryview(packer).
+ """
+ return memoryview(self)
-def packb(object o, default=None, encoding='utf-8', str unicode_errors='strict', bint use_single_float=False):
- """
- pack o and return packed bytes
+ def __getbuffer__(self, Py_buffer *buffer, int flags):
+ PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags)
+ self.exports += 1
- See :class:`Packer` for options.
- """
- packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors,
- use_single_float=use_single_float)
- return packer.pack(o)
+ def __releasebuffer__(self, Py_buffer *buffer):
+ self.exports -= 1
diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx
index 5813dc94..34ff3304 100644
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@@ -1,60 +1,84 @@
# coding: utf-8
-#cython: embedsignature=True
from cpython cimport *
cdef extern from "Python.h":
- ctypedef char* const_char_ptr "const char*"
- ctypedef char* const_void_ptr "const void*"
ctypedef struct PyObject
- cdef int PyObject_AsReadBuffer(object o, const_void_ptr* buff, Py_ssize_t* buf_len) except -1
+ object PyMemoryView_GetContiguous(object obj, int buffertype, char order)
from libc.stdlib cimport *
from libc.string cimport *
from libc.limits cimport *
+from libc.stdint cimport uint64_t
-from msgpack.exceptions import (
- BufferFull,
- OutOfData,
- UnpackValueError,
- ExtraData,
- )
+from .exceptions import (
+ BufferFull,
+ OutOfData,
+ ExtraData,
+ FormatError,
+ StackError,
+)
+from .ext import ExtType, Timestamp
+cdef object giga = 1_000_000_000
cdef extern from "unpack.h":
ctypedef struct msgpack_user:
bint use_list
- PyObject* object_hook
+ bint raw
bint has_pairs_hook # call object_hook with k-v pairs
+ bint strict_map_key
+ int timestamp
+ PyObject* object_hook
PyObject* list_hook
- char *encoding
- char *unicode_errors
+ PyObject* ext_hook
+ PyObject* timestamp_t
+ PyObject *giga;
+ PyObject *utc;
+ const char *unicode_errors
+ Py_ssize_t max_str_len
+ Py_ssize_t max_bin_len
+ Py_ssize_t max_array_len
+ Py_ssize_t max_map_len
+ Py_ssize_t max_ext_len
ctypedef struct unpack_context:
msgpack_user user
PyObject* obj
- size_t count
- unsigned int ct
- PyObject* key
+ Py_ssize_t count
- ctypedef int (*execute_fn)(unpack_context* ctx, const_char_ptr data,
- size_t len, size_t* off) except? -1
+ ctypedef int (*execute_fn)(unpack_context* ctx, const char* data,
+ Py_ssize_t len, Py_ssize_t* off) except? -1
execute_fn unpack_construct
execute_fn unpack_skip
execute_fn read_array_header
execute_fn read_map_header
void unpack_init(unpack_context* ctx)
object unpack_data(unpack_context* ctx)
+ void unpack_clear(unpack_context* ctx)
cdef inline init_ctx(unpack_context *ctx,
- object object_hook, object object_pairs_hook, object list_hook,
- bint use_list, char* encoding, char* unicode_errors):
+ object object_hook, object object_pairs_hook,
+ object list_hook, object ext_hook,
+ bint use_list, bint raw, int timestamp,
+ bint strict_map_key,
+ const char* unicode_errors,
+ Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
+ Py_ssize_t max_array_len, Py_ssize_t max_map_len,
+ Py_ssize_t max_ext_len):
unpack_init(ctx)
ctx.user.use_list = use_list
+ ctx.user.raw = raw
+ ctx.user.strict_map_key = strict_map_key
ctx.user.object_hook = ctx.user.list_hook = NULL
+ ctx.user.max_str_len = max_str_len
+ ctx.user.max_bin_len = max_bin_len
+ ctx.user.max_array_len = max_array_len
+ ctx.user.max_map_len = max_map_len
+ ctx.user.max_ext_len = max_ext_len
if object_hook is not None and object_pairs_hook is not None:
- raise ValueError("object_pairs_hook and object_hook are mutually exclusive.")
+ raise TypeError("object_pairs_hook and object_hook are mutually exclusive.")
if object_hook is not None:
if not PyCallable_Check(object_hook):
@@ -74,116 +98,197 @@ cdef inline init_ctx(unpack_context *ctx,
raise TypeError("list_hook must be a callable.")
ctx.user.list_hook = list_hook
- ctx.user.encoding = encoding
+ if ext_hook is not None:
+ if not PyCallable_Check(ext_hook):
+ raise TypeError("ext_hook must be a callable.")
+ ctx.user.ext_hook = ext_hook
+
+ if timestamp < 0 or 3 < timestamp:
+ raise ValueError("timestamp must be 0..3")
+
+ # Add Timestamp type to the user object so it may be used in unpack.h
+ ctx.user.timestamp = timestamp
+ ctx.user.timestamp_t = Timestamp
+ ctx.user.giga = giga
+ ctx.user.utc = utc
ctx.user.unicode_errors = unicode_errors
-def unpackb(object packed, object object_hook=None, object list_hook=None,
- bint use_list=1, encoding=None, unicode_errors="strict",
- object_pairs_hook=None,
- ):
+def default_read_extended_type(typecode, data):
+ raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
+
+cdef inline int get_data_from_buffer(object obj,
+ Py_buffer *view,
+ char **buf,
+ Py_ssize_t *buffer_len) except 0:
+ cdef object contiguous
+ cdef Py_buffer tmp
+ if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1:
+ raise
+ if view.itemsize != 1:
+ PyBuffer_Release(view)
+ raise BufferError("cannot unpack from multi-byte object")
+ if PyBuffer_IsContiguous(view, b'A') == 0:
+ PyBuffer_Release(view)
+ # create a contiguous copy and get buffer
+ contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, b'C')
+ PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE)
+ # view must hold the only reference to contiguous,
+ # so memory is freed when view is released
+ Py_DECREF(contiguous)
+ buffer_len[0] = view.len
+ buf[0] = view.buf
+ return 1
+
+
+def unpackb(object packed, *, object object_hook=None, object list_hook=None,
+ bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True,
+ unicode_errors=None,
+ object_pairs_hook=None, ext_hook=ExtType,
+ Py_ssize_t max_str_len=-1,
+ Py_ssize_t max_bin_len=-1,
+ Py_ssize_t max_array_len=-1,
+ Py_ssize_t max_map_len=-1,
+ Py_ssize_t max_ext_len=-1):
"""
Unpack packed_bytes to object. Returns an unpacked object.
- Raises `ValueError` when `packed` contains extra bytes.
+ Raises ``ExtraData`` when *packed* contains extra bytes.
+ Raises ``ValueError`` when *packed* is incomplete.
+ Raises ``FormatError`` when *packed* is not valid msgpack.
+ Raises ``StackError`` when *packed* contains too nested.
+ Other exceptions can be raised during unpacking.
See :class:`Unpacker` for options.
+
+ *max_xxx_len* options are configured automatically from ``len(packed)``.
"""
cdef unpack_context ctx
- cdef size_t off = 0
+ cdef Py_ssize_t off = 0
cdef int ret
- cdef char* buf
+ cdef Py_buffer view
+ cdef char* buf = NULL
cdef Py_ssize_t buf_len
- cdef char* cenc = NULL
- cdef char* cerr = NULL
-
- PyObject_AsReadBuffer(packed, &buf, &buf_len)
-
- if encoding is not None:
- if isinstance(encoding, unicode):
- encoding = encoding.encode('ascii')
- cenc = PyBytes_AsString(encoding)
+ cdef const char* cerr = NULL
if unicode_errors is not None:
- if isinstance(unicode_errors, unicode):
- unicode_errors = unicode_errors.encode('ascii')
- cerr = PyBytes_AsString(unicode_errors)
+ cerr = unicode_errors
+
+ get_data_from_buffer(packed, &view, &buf, &buf_len)
+
+ if max_str_len == -1:
+ max_str_len = buf_len
+ if max_bin_len == -1:
+ max_bin_len = buf_len
+ if max_array_len == -1:
+ max_array_len = buf_len
+ if max_map_len == -1:
+ max_map_len = buf_len//2
+ if max_ext_len == -1:
+ max_ext_len = buf_len
+
+ try:
+ init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
+ use_list, raw, timestamp, strict_map_key, cerr,
+ max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
+ ret = unpack_construct(&ctx, buf, buf_len, &off)
+ finally:
+ PyBuffer_Release(&view);
- init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
- ret = unpack_construct(&ctx, buf, buf_len, &off)
if ret == 1:
obj = unpack_data(&ctx)
if off < buf_len:
raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off))
return obj
- else:
- raise UnpackValueError("Unpack failed: error = %d" % (ret,))
+ unpack_clear(&ctx)
+ if ret == 0:
+ raise ValueError("Unpack failed: incomplete input")
+ elif ret == -2:
+ raise FormatError
+ elif ret == -3:
+ raise StackError
+ raise ValueError("Unpack failed: error = %d" % (ret,))
-def unpack(object stream, object object_hook=None, object list_hook=None,
- bint use_list=1, encoding=None, unicode_errors="strict",
- object_pairs_hook=None,
- ):
- """
- Unpack an object from `stream`.
+cdef class Unpacker:
+ """Streaming unpacker.
- Raises `ValueError` when `stream` has extra bytes.
-
- See :class:`Unpacker` for options.
- """
- return unpackb(stream.read(), use_list=use_list,
- object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook,
- encoding=encoding, unicode_errors=unicode_errors,
- )
-
-
-cdef class Unpacker(object):
- """
- Streaming unpacker.
-
- arguments:
+ Arguments:
:param file_like:
File-like object having `.read(n)` method.
- If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable.
+ If specified, unpacker reads serialized data from it and `.feed()` is not usable.
:param int read_size:
- Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`)
+ Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`)
:param bool use_list:
If true, unpack msgpack array to Python list.
Otherwise, unpack to Python tuple. (default: True)
- :param callable object_hook:
+ :param bool raw:
+ If true, unpack msgpack raw to Python bytes.
+ Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
+
+ :param int timestamp:
+ Control how timestamp type is unpacked:
+
+ 0 - Timestamp
+ 1 - float (Seconds from the EPOCH)
+ 2 - int (Nanoseconds from the EPOCH)
+ 3 - datetime.datetime (UTC).
+
+ :param bool strict_map_key:
+ If true (default), only str or bytes are accepted for map (dict) keys.
+
+ :param object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
(See also simplejson)
- :param callable object_pairs_hook:
+ :param object_pairs_hook:
When specified, it should be callable.
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
(See also simplejson)
- :param str encoding:
- Encoding used for decoding msgpack raw.
- If it is None (default), msgpack raw is deserialized to Python bytes.
-
:param str unicode_errors:
- Used for decoding msgpack raw with *encoding*.
- (default: `'strict'`)
+ The error handler for decoding unicode. (default: 'strict')
+ This option should be used only when you have msgpack data which
+ contains invalid UTF-8 string.
:param int max_buffer_size:
- Limits size of data waiting unpacked. 0 means system's INT_MAX (default).
+ Limits size of data waiting unpacked. 0 means 2**32-1.
+ The default value is 100*1024*1024 (100MiB).
Raises `BufferFull` exception when it is insufficient.
- You shoud set this parameter when unpacking data from untrasted source.
+ You should set this parameter when unpacking data from untrusted source.
+
+ :param int max_str_len:
+ Deprecated, use *max_buffer_size* instead.
+ Limits max length of str. (default: max_buffer_size)
- example of streaming deserialize from file-like object::
+ :param int max_bin_len:
+ Deprecated, use *max_buffer_size* instead.
+ Limits max length of bin. (default: max_buffer_size)
+
+ :param int max_array_len:
+ Limits max length of array.
+ (default: max_buffer_size)
+
+ :param int max_map_len:
+ Limits max length of map.
+ (default: max_buffer_size//2)
+
+ :param int max_ext_len:
+ Deprecated, use *max_buffer_size* instead.
+ Limits max size of ext type. (default: max_buffer_size)
+
+ Example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like)
for o in unpacker:
process(o)
- example of streaming deserialize from socket::
+ Example of streaming deserialize from socket::
unpacker = Unpacker()
while True:
@@ -193,84 +298,116 @@ cdef class Unpacker(object):
unpacker.feed(buf)
for o in unpacker:
process(o)
+
+ Raises ``ExtraData`` when *packed* contains extra bytes.
+ Raises ``OutOfData`` when *packed* is incomplete.
+ Raises ``FormatError`` when *packed* is not valid msgpack.
+ Raises ``StackError`` when *packed* contains too nested.
+ Other exceptions can be raised during unpacking.
"""
cdef unpack_context ctx
cdef char* buf
- cdef size_t buf_size, buf_head, buf_tail
+ cdef Py_ssize_t buf_size, buf_head, buf_tail
cdef object file_like
cdef object file_like_read
cdef Py_ssize_t read_size
- cdef object object_hook
- cdef object encoding, unicode_errors
- cdef size_t max_buffer_size
+ # To maintain refcnt.
+ cdef object object_hook, object_pairs_hook, list_hook, ext_hook
+ cdef object unicode_errors
+ cdef Py_ssize_t max_buffer_size
+ cdef uint64_t stream_offset
def __cinit__(self):
self.buf = NULL
def __dealloc__(self):
- free(self.buf)
+ PyMem_Free(self.buf)
self.buf = NULL
- def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
+ def __init__(self, file_like=None, *, Py_ssize_t read_size=0,
+ bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True,
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
- str encoding=None, str unicode_errors='strict', int max_buffer_size=0,
- ):
- cdef char *cenc=NULL, *cerr=NULL
+ unicode_errors=None, Py_ssize_t max_buffer_size=100*1024*1024,
+ object ext_hook=ExtType,
+ Py_ssize_t max_str_len=-1,
+ Py_ssize_t max_bin_len=-1,
+ Py_ssize_t max_array_len=-1,
+ Py_ssize_t max_map_len=-1,
+ Py_ssize_t max_ext_len=-1):
+ cdef const char *cerr=NULL
+
+ self.object_hook = object_hook
+ self.object_pairs_hook = object_pairs_hook
+ self.list_hook = list_hook
+ self.ext_hook = ext_hook
self.file_like = file_like
if file_like:
self.file_like_read = file_like.read
if not PyCallable_Check(self.file_like_read):
- raise ValueError("`file_like.read` must be a callable.")
+ raise TypeError("`file_like.read` must be a callable.")
+
if not max_buffer_size:
max_buffer_size = INT_MAX
+ if max_str_len == -1:
+ max_str_len = max_buffer_size
+ if max_bin_len == -1:
+ max_bin_len = max_buffer_size
+ if max_array_len == -1:
+ max_array_len = max_buffer_size
+ if max_map_len == -1:
+ max_map_len = max_buffer_size//2
+ if max_ext_len == -1:
+ max_ext_len = max_buffer_size
+
if read_size > max_buffer_size:
raise ValueError("read_size should be less or equal to max_buffer_size")
if not read_size:
read_size = min(max_buffer_size, 1024**2)
+
self.max_buffer_size = max_buffer_size
self.read_size = read_size
- self.buf = malloc(read_size)
+ self.buf = PyMem_Malloc(read_size)
if self.buf == NULL:
raise MemoryError("Unable to allocate internal buffer.")
self.buf_size = read_size
self.buf_head = 0
self.buf_tail = 0
-
- if encoding is not None:
- if isinstance(encoding, unicode):
- self.encoding = encoding.encode('ascii')
- else:
- self.encoding = encoding
- cenc = PyBytes_AsString(self.encoding)
+ self.stream_offset = 0
if unicode_errors is not None:
- if isinstance(unicode_errors, unicode):
- self.unicode_errors = unicode_errors.encode('ascii')
- else:
- self.unicode_errors = unicode_errors
- cerr = PyBytes_AsString(self.unicode_errors)
+ self.unicode_errors = unicode_errors
+ cerr = unicode_errors
- init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
+ init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
+ ext_hook, use_list, raw, timestamp, strict_map_key, cerr,
+ max_str_len, max_bin_len, max_array_len,
+ max_map_len, max_ext_len)
def feed(self, object next_bytes):
"""Append `next_bytes` to internal buffer."""
+ cdef Py_buffer pybuff
cdef char* buf
cdef Py_ssize_t buf_len
+
if self.file_like is not None:
raise AssertionError(
"unpacker.feed() is not be able to use with `file_like`.")
- PyObject_AsReadBuffer(next_bytes, &buf, &buf_len)
- self.append_buffer(buf, buf_len)
+
+ get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len)
+ try:
+ self.append_buffer(buf, buf_len)
+ finally:
+ PyBuffer_Release(&pybuff)
cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len):
cdef:
char* buf = self.buf
char* new_buf
- size_t head = self.buf_head
- size_t tail = self.buf_tail
- size_t buf_size = self.buf_size
- size_t new_size
+ Py_ssize_t head = self.buf_head
+ Py_ssize_t tail = self.buf_tail
+ Py_ssize_t buf_size = self.buf_size
+ Py_ssize_t new_size
if tail + _buf_len > buf_size:
if ((tail - head) + _buf_len) <= buf_size:
@@ -284,13 +421,13 @@ cdef class Unpacker(object):
if new_size > self.max_buffer_size:
raise BufferFull
new_size = min(new_size*2, self.max_buffer_size)
- new_buf = malloc(new_size)
+ new_buf = PyMem_Malloc(new_size)
if new_buf == NULL:
# self.buf still holds old buffer and will be freed during
# obj destruction
raise MemoryError("Unable to enlarge internal buffer.")
memcpy(new_buf, buf + head, tail - head)
- free(buf)
+ PyMem_Free(buf)
buf = new_buf
buf_size = new_size
@@ -303,25 +440,30 @@ cdef class Unpacker(object):
self.buf_size = buf_size
self.buf_tail = tail + _buf_len
- cdef read_from_file(self):
- next_bytes = self.file_like_read(
- min(self.read_size,
- self.max_buffer_size - (self.buf_tail - self.buf_head)
- ))
+ cdef int read_from_file(self) except -1:
+ cdef Py_ssize_t remains = self.max_buffer_size - (self.buf_tail - self.buf_head)
+ if remains <= 0:
+ raise BufferFull
+
+ next_bytes = self.file_like_read(min(self.read_size, remains))
if next_bytes:
self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes))
else:
self.file_like = None
+ return 0
- cdef object _unpack(self, execute_fn execute, object write_bytes, bint iter=0):
+ cdef object _unpack(self, execute_fn execute, bint iter=0):
cdef int ret
cdef object obj
- cdef size_t prev_head
+ cdef Py_ssize_t prev_head
+
while 1:
prev_head = self.buf_head
- ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head)
- if write_bytes is not None:
- write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head))
+ if prev_head < self.buf_tail:
+ ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head)
+ self.stream_offset += self.buf_head - prev_head
+ else:
+ ret = 0
if ret == 1:
obj = unpack_data(&self.ctx)
@@ -335,62 +477,67 @@ cdef class Unpacker(object):
raise StopIteration("No more data to unpack.")
else:
raise OutOfData("No more data to unpack.")
+ elif ret == -2:
+ raise FormatError
+ elif ret == -3:
+ raise StackError
else:
raise ValueError("Unpack failed: error = %d" % (ret,))
def read_bytes(self, Py_ssize_t nbytes):
- """read a specified number of raw bytes from the stream"""
- cdef size_t nread
+ """Read a specified number of raw bytes from the stream"""
+ cdef Py_ssize_t nread
nread = min(self.buf_tail - self.buf_head, nbytes)
ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread)
self.buf_head += nread
- if len(ret) < nbytes and self.file_like is not None:
- ret += self.file_like.read(nbytes - len(ret))
+ if nread < nbytes and self.file_like is not None:
+ ret += self.file_like.read(nbytes - nread)
+ nread = len(ret)
+ self.stream_offset += nread
return ret
- def unpack(self, object write_bytes=None):
- """
- unpack one object
-
- If write_bytes is not None, it will be called with parts of the raw
- message as it is unpacked.
+ def unpack(self):
+ """Unpack one object
Raises `OutOfData` when there are no more bytes to unpack.
"""
- return self._unpack(unpack_construct, write_bytes)
-
- def skip(self, object write_bytes=None):
- """
- read and ignore one object, returning None
+ return self._unpack(unpack_construct)
- If write_bytes is not None, it will be called with parts of the raw
- message as it is unpacked.
+ def skip(self):
+ """Read and ignore one object, returning None
Raises `OutOfData` when there are no more bytes to unpack.
"""
- return self._unpack(unpack_skip, write_bytes)
+ return self._unpack(unpack_skip)
- def read_array_header(self, object write_bytes=None):
+ def read_array_header(self):
"""assuming the next object is an array, return its size n, such that
the next n unpack() calls will iterate over its contents.
Raises `OutOfData` when there are no more bytes to unpack.
"""
- return self._unpack(read_array_header, write_bytes)
+ return self._unpack(read_array_header)
- def read_map_header(self, object write_bytes=None):
+ def read_map_header(self):
"""assuming the next object is a map, return its size n, such that the
next n * 2 unpack() calls will iterate over its key-value pairs.
Raises `OutOfData` when there are no more bytes to unpack.
"""
- return self._unpack(read_map_header, write_bytes)
+ return self._unpack(read_map_header)
+
+ def tell(self):
+ """Returns the current position of the Unpacker in bytes, i.e., the
+ number of bytes that were read from the input, also the starting
+ position of the next object.
+ """
+ return self.stream_offset
def __iter__(self):
return self
def __next__(self):
- return self._unpack(unpack_construct, None, 1)
+ return self._unpack(unpack_construct, 1)
# for debug.
#def _buf(self):
diff --git a/msgpack/_version.py b/msgpack/_version.py
deleted file mode 100644
index 5999edeb..00000000
--- a/msgpack/_version.py
+++ /dev/null
@@ -1 +0,0 @@
-version = (0, 3, 0)
diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py
index 25655416..d6d2615c 100644
--- a/msgpack/exceptions.py
+++ b/msgpack/exceptions.py
@@ -1,5 +1,10 @@
class UnpackException(Exception):
- pass
+ """Base class for some exceptions raised while unpacking.
+
+ NOTE: unpack may raise exception other than subclass of
+ UnpackException. If you want to catch all error, catch
+ Exception instead.
+ """
class BufferFull(UnpackException):
@@ -10,20 +15,34 @@ class OutOfData(UnpackException):
pass
-class UnpackValueError(UnpackException, ValueError):
- pass
+class FormatError(ValueError, UnpackException):
+ """Invalid msgpack format"""
+
+
+class StackError(ValueError, UnpackException):
+ """Too nested"""
+
+# Deprecated. Use ValueError instead
+UnpackValueError = ValueError
+
+
+class ExtraData(UnpackValueError):
+ """ExtraData is raised when there is trailing data.
+
+ This exception is raised while only one-shot (not streaming)
+ unpack.
+ """
-class ExtraData(ValueError):
def __init__(self, unpacked, extra):
self.unpacked = unpacked
self.extra = extra
def __str__(self):
- return "unpack(b) recieved extra data."
+ return "unpack(b) received extra data."
-class PackException(Exception):
- pass
-class PackValueError(PackException, ValueError):
- pass
+# Deprecated. Use Exception instead to catch all exception during packing.
+PackException = Exception
+PackValueError = ValueError
+PackOverflowError = OverflowError
diff --git a/msgpack/ext.py b/msgpack/ext.py
new file mode 100644
index 00000000..9694819a
--- /dev/null
+++ b/msgpack/ext.py
@@ -0,0 +1,170 @@
+import datetime
+import struct
+from collections import namedtuple
+
+
+class ExtType(namedtuple("ExtType", "code data")):
+ """ExtType represents ext type in msgpack."""
+
+ def __new__(cls, code, data):
+ if not isinstance(code, int):
+ raise TypeError("code must be int")
+ if not isinstance(data, bytes):
+ raise TypeError("data must be bytes")
+ if not 0 <= code <= 127:
+ raise ValueError("code must be 0~127")
+ return super().__new__(cls, code, data)
+
+
+class Timestamp:
+ """Timestamp represents the Timestamp extension type in msgpack.
+
+ When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`.
+ When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and
+ unpack `Timestamp`.
+
+ This class is immutable: Do not override seconds and nanoseconds.
+ """
+
+ __slots__ = ["seconds", "nanoseconds"]
+
+ def __init__(self, seconds, nanoseconds=0):
+ """Initialize a Timestamp object.
+
+ :param int seconds:
+ Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds).
+ May be negative.
+
+ :param int nanoseconds:
+ Number of nanoseconds to add to `seconds` to get fractional time.
+ Maximum is 999_999_999. Default is 0.
+
+ Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns.
+ """
+ if not isinstance(seconds, int):
+ raise TypeError("seconds must be an integer")
+ if not isinstance(nanoseconds, int):
+ raise TypeError("nanoseconds must be an integer")
+ if not (0 <= nanoseconds < 10**9):
+ raise ValueError("nanoseconds must be a non-negative integer less than 999999999.")
+ self.seconds = seconds
+ self.nanoseconds = nanoseconds
+
+ def __repr__(self):
+ """String representation of Timestamp."""
+ return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})"
+
+ def __eq__(self, other):
+ """Check for equality with another Timestamp object"""
+ if type(other) is self.__class__:
+ return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds
+ return False
+
+ def __ne__(self, other):
+ """not-equals method (see :func:`__eq__()`)"""
+ return not self.__eq__(other)
+
+ def __hash__(self):
+ return hash((self.seconds, self.nanoseconds))
+
+ @staticmethod
+ def from_bytes(b):
+ """Unpack bytes into a `Timestamp` object.
+
+ Used for pure-Python msgpack unpacking.
+
+ :param b: Payload from msgpack ext message with code -1
+ :type b: bytes
+
+ :returns: Timestamp object unpacked from msgpack ext payload
+ :rtype: Timestamp
+ """
+ if len(b) == 4:
+ seconds = struct.unpack("!L", b)[0]
+ nanoseconds = 0
+ elif len(b) == 8:
+ data64 = struct.unpack("!Q", b)[0]
+ seconds = data64 & 0x00000003FFFFFFFF
+ nanoseconds = data64 >> 34
+ elif len(b) == 12:
+ nanoseconds, seconds = struct.unpack("!Iq", b)
+ else:
+ raise ValueError(
+ "Timestamp type can only be created from 32, 64, or 96-bit byte objects"
+ )
+ return Timestamp(seconds, nanoseconds)
+
+ def to_bytes(self):
+ """Pack this Timestamp object into bytes.
+
+ Used for pure-Python msgpack packing.
+
+ :returns data: Payload for EXT message with code -1 (timestamp type)
+ :rtype: bytes
+ """
+ if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits
+ data64 = self.nanoseconds << 34 | self.seconds
+ if data64 & 0xFFFFFFFF00000000 == 0:
+ # nanoseconds is zero and seconds < 2**32, so timestamp 32
+ data = struct.pack("!L", data64)
+ else:
+ # timestamp 64
+ data = struct.pack("!Q", data64)
+ else:
+ # timestamp 96
+ data = struct.pack("!Iq", self.nanoseconds, self.seconds)
+ return data
+
+ @staticmethod
+ def from_unix(unix_sec):
+ """Create a Timestamp from posix timestamp in seconds.
+
+ :param unix_float: Posix timestamp in seconds.
+ :type unix_float: int or float
+ """
+ seconds = int(unix_sec // 1)
+ nanoseconds = int((unix_sec % 1) * 10**9)
+ return Timestamp(seconds, nanoseconds)
+
+ def to_unix(self):
+ """Get the timestamp as a floating-point value.
+
+ :returns: posix timestamp
+ :rtype: float
+ """
+ return self.seconds + self.nanoseconds / 1e9
+
+ @staticmethod
+ def from_unix_nano(unix_ns):
+ """Create a Timestamp from posix timestamp in nanoseconds.
+
+ :param int unix_ns: Posix timestamp in nanoseconds.
+ :rtype: Timestamp
+ """
+ return Timestamp(*divmod(unix_ns, 10**9))
+
+ def to_unix_nano(self):
+ """Get the timestamp as a unixtime in nanoseconds.
+
+ :returns: posix timestamp in nanoseconds
+ :rtype: int
+ """
+ return self.seconds * 10**9 + self.nanoseconds
+
+ def to_datetime(self):
+ """Get the timestamp as a UTC datetime.
+
+ :rtype: `datetime.datetime`
+ """
+ utc = datetime.timezone.utc
+ return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta(
+ seconds=self.seconds, microseconds=self.nanoseconds // 1000
+ )
+
+ @staticmethod
+ def from_datetime(dt):
+ """Create a Timestamp from datetime with tzinfo.
+
+ :rtype: Timestamp
+ """
+ return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000)
diff --git a/msgpack/fallback.py b/msgpack/fallback.py
index f9a2f5e8..b02e47cf 100644
--- a/msgpack/fallback.py
+++ b/msgpack/fallback.py
@@ -1,432 +1,611 @@
"""Fallback pure Python implementation of msgpack"""
-import sys
-import array
import struct
+import sys
+from datetime import datetime as _DateTime
-if sys.version_info[0] == 3:
- PY3 = True
- int_types = int
- Unicode = str
- xrange = range
- def dict_iteritems(d):
- return d.items()
-else:
- PY3 = False
- int_types = (int, long)
- Unicode = unicode
- def dict_iteritems(d):
- return d.iteritems()
+if hasattr(sys, "pypy_version_info"):
+ from __pypy__ import newlist_hint
+ from __pypy__.builders import BytesBuilder
+ _USING_STRINGBUILDER = True
-if hasattr(sys, 'pypy_version_info'):
- # cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own
- # StringBuilder is fastest.
- from __pypy__ import newlist_hint
- from __pypy__.builders import StringBuilder
- USING_STRINGBUILDER = True
- class StringIO(object):
- def __init__(self, s=b''):
+ class BytesIO:
+ def __init__(self, s=b""):
if s:
- self.builder = StringBuilder(len(s))
+ self.builder = BytesBuilder(len(s))
self.builder.append(s)
else:
- self.builder = StringBuilder()
+ self.builder = BytesBuilder()
+
def write(self, s):
+ if isinstance(s, memoryview):
+ s = s.tobytes()
+ elif isinstance(s, bytearray):
+ s = bytes(s)
self.builder.append(s)
+
def getvalue(self):
return self.builder.build()
+
else:
- USING_STRINGBUILDER = False
- from io import BytesIO as StringIO
- newlist_hint = lambda size: []
-
-from msgpack.exceptions import (
- BufferFull,
- OutOfData,
- UnpackValueError,
- PackValueError,
- ExtraData)
-
-EX_SKIP = 0
-EX_CONSTRUCT = 1
-EX_READ_ARRAY_HEADER = 2
-EX_READ_MAP_HEADER = 3
-
-TYPE_IMMEDIATE = 0
-TYPE_ARRAY = 1
-TYPE_MAP = 2
-TYPE_RAW = 3
-
-DEFAULT_RECURSE_LIMIT=511
-
-def pack(o, stream, **kwargs):
- """
- Pack object `o` and write it to `stream`
+ from io import BytesIO
- See :class:`Packer` for options.
- """
- packer = Packer(**kwargs)
- stream.write(packer.pack(o))
+ _USING_STRINGBUILDER = False
-def packb(o, **kwargs):
- """
- Pack object `o` and return packed bytes
+ def newlist_hint(size):
+ return []
- See :class:`Packer` for options.
- """
- return Packer(**kwargs).pack(o)
-def unpack(stream, **kwargs):
- """
- Unpack an object from `stream`.
+from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError
+from .ext import ExtType, Timestamp
+
+EX_SKIP = 0
+EX_CONSTRUCT = 1
+EX_READ_ARRAY_HEADER = 2
+EX_READ_MAP_HEADER = 3
+
+TYPE_IMMEDIATE = 0
+TYPE_ARRAY = 1
+TYPE_MAP = 2
+TYPE_RAW = 3
+TYPE_BIN = 4
+TYPE_EXT = 5
+
+DEFAULT_RECURSE_LIMIT = 511
+
+
+def _check_type_strict(obj, t, type=type, tuple=tuple):
+ if type(t) is tuple:
+ return type(obj) in t
+ else:
+ return type(obj) is t
+
+
+def _get_data_from_buffer(obj):
+ view = memoryview(obj)
+ if view.itemsize != 1:
+ raise ValueError("cannot unpack from multi-byte object")
+ return view
- Raises `ExtraData` when `packed` contains extra bytes.
- See :class:`Unpacker` for options.
- """
- unpacker = Unpacker(stream, **kwargs)
- ret = unpacker._fb_unpack()
- if unpacker._fb_got_extradata():
- raise ExtraData(ret, unpacker._fb_get_extradata())
- return ret
def unpackb(packed, **kwargs):
"""
Unpack an object from `packed`.
- Raises `ExtraData` when `packed` contains extra bytes.
+ Raises ``ExtraData`` when *packed* contains extra bytes.
+ Raises ``ValueError`` when *packed* is incomplete.
+ Raises ``FormatError`` when *packed* is not valid msgpack.
+ Raises ``StackError`` when *packed* contains too nested.
+ Other exceptions can be raised during unpacking.
+
See :class:`Unpacker` for options.
"""
- unpacker = Unpacker(None, **kwargs)
+ unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs)
unpacker.feed(packed)
try:
- ret = unpacker._fb_unpack()
+ ret = unpacker._unpack()
except OutOfData:
- raise UnpackValueError("Data is not enough.")
- if unpacker._fb_got_extradata():
- raise ExtraData(ret, unpacker._fb_get_extradata())
+ raise ValueError("Unpack failed: incomplete input")
+ except RecursionError:
+ raise StackError
+ if unpacker._got_extradata():
+ raise ExtraData(ret, unpacker._get_extradata())
return ret
-class Unpacker(object):
- """
- Streaming unpacker.
-
- `file_like` is a file-like object having a `.read(n)` method.
- When `Unpacker` is initialized with a `file_like`, `.feed()` is not
- usable.
-
- `read_size` is used for `file_like.read(read_size)`.
- If `use_list` is True (default), msgpack lists are deserialized to Python
- lists. Otherwise they are deserialized to tuples.
-
- `object_hook` is the same as in simplejson. If it is not None, it should
- be callable and Unpacker calls it with a dict argument after deserializing
- a map.
-
- `object_pairs_hook` is the same as in simplejson. If it is not None, it
- should be callable and Unpacker calls it with a list of key-value pairs
- after deserializing a map.
-
- `encoding` is the encoding used for decoding msgpack bytes. If it is
- None (default), msgpack bytes are deserialized to Python bytes.
-
- `unicode_errors` is used for decoding bytes.
-
- `max_buffer_size` limits the buffer size. 0 means INT_MAX (default).
-
- Raises `BufferFull` exception when it is unsufficient.
-
- You should set this parameter when unpacking data from an untrustred source.
-
- example of streaming deserialization from file-like object::
+_NO_FORMAT_USED = ""
+_MSGPACK_HEADERS = {
+ 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN),
+ 0xC5: (2, ">H", TYPE_BIN),
+ 0xC6: (4, ">I", TYPE_BIN),
+ 0xC7: (2, "Bb", TYPE_EXT),
+ 0xC8: (3, ">Hb", TYPE_EXT),
+ 0xC9: (5, ">Ib", TYPE_EXT),
+ 0xCA: (4, ">f"),
+ 0xCB: (8, ">d"),
+ 0xCC: (1, _NO_FORMAT_USED),
+ 0xCD: (2, ">H"),
+ 0xCE: (4, ">I"),
+ 0xCF: (8, ">Q"),
+ 0xD0: (1, "b"),
+ 0xD1: (2, ">h"),
+ 0xD2: (4, ">i"),
+ 0xD3: (8, ">q"),
+ 0xD4: (1, "b1s", TYPE_EXT),
+ 0xD5: (2, "b2s", TYPE_EXT),
+ 0xD6: (4, "b4s", TYPE_EXT),
+ 0xD7: (8, "b8s", TYPE_EXT),
+ 0xD8: (16, "b16s", TYPE_EXT),
+ 0xD9: (1, _NO_FORMAT_USED, TYPE_RAW),
+ 0xDA: (2, ">H", TYPE_RAW),
+ 0xDB: (4, ">I", TYPE_RAW),
+ 0xDC: (2, ">H", TYPE_ARRAY),
+ 0xDD: (4, ">I", TYPE_ARRAY),
+ 0xDE: (2, ">H", TYPE_MAP),
+ 0xDF: (4, ">I", TYPE_MAP),
+}
+
+
+class Unpacker:
+ """Streaming unpacker.
+
+ Arguments:
+
+ :param file_like:
+ File-like object having `.read(n)` method.
+ If specified, unpacker reads serialized data from it and `.feed()` is not usable.
+
+ :param int read_size:
+ Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`)
+
+ :param bool use_list:
+ If true, unpack msgpack array to Python list.
+ Otherwise, unpack to Python tuple. (default: True)
+
+ :param bool raw:
+ If true, unpack msgpack raw to Python bytes.
+ Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
+
+ :param int timestamp:
+ Control how timestamp type is unpacked:
+
+ 0 - Timestamp
+ 1 - float (Seconds from the EPOCH)
+ 2 - int (Nanoseconds from the EPOCH)
+ 3 - datetime.datetime (UTC).
+
+ :param bool strict_map_key:
+ If true (default), only str or bytes are accepted for map (dict) keys.
+
+ :param object_hook:
+ When specified, it should be callable.
+ Unpacker calls it with a dict argument after unpacking msgpack map.
+ (See also simplejson)
+
+ :param object_pairs_hook:
+ When specified, it should be callable.
+ Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
+ (See also simplejson)
+
+ :param str unicode_errors:
+ The error handler for decoding unicode. (default: 'strict')
+ This option should be used only when you have msgpack data which
+ contains invalid UTF-8 string.
+
+ :param int max_buffer_size:
+ Limits size of data waiting unpacked. 0 means 2**32-1.
+ The default value is 100*1024*1024 (100MiB).
+ Raises `BufferFull` exception when it is insufficient.
+ You should set this parameter when unpacking data from untrusted source.
+
+ :param int max_str_len:
+ Deprecated, use *max_buffer_size* instead.
+ Limits max length of str. (default: max_buffer_size)
+
+ :param int max_bin_len:
+ Deprecated, use *max_buffer_size* instead.
+ Limits max length of bin. (default: max_buffer_size)
+
+ :param int max_array_len:
+ Limits max length of array.
+ (default: max_buffer_size)
+
+ :param int max_map_len:
+ Limits max length of map.
+ (default: max_buffer_size//2)
+
+ :param int max_ext_len:
+ Deprecated, use *max_buffer_size* instead.
+ Limits max size of ext type. (default: max_buffer_size)
+
+ Example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like)
for o in unpacker:
- do_something(o)
+ process(o)
- example of streaming deserialization from socket::
+ Example of streaming deserialize from socket::
- unpacker = Unapcker()
- while 1:
- buf = sock.recv(1024*2)
+ unpacker = Unpacker()
+ while True:
+ buf = sock.recv(1024**2)
if not buf:
break
unpacker.feed(buf)
for o in unpacker:
- do_something(o)
+ process(o)
+
+ Raises ``ExtraData`` when *packed* contains extra bytes.
+ Raises ``OutOfData`` when *packed* is incomplete.
+ Raises ``FormatError`` when *packed* is not valid msgpack.
+ Raises ``StackError`` when *packed* contains too nested.
+ Other exceptions can be raised during unpacking.
"""
- def __init__(self, file_like=None, read_size=0, use_list=True,
- object_hook=None, object_pairs_hook=None, list_hook=None,
- encoding=None, unicode_errors='strict', max_buffer_size=0):
+ def __init__(
+ self,
+ file_like=None,
+ *,
+ read_size=0,
+ use_list=True,
+ raw=False,
+ timestamp=0,
+ strict_map_key=True,
+ object_hook=None,
+ object_pairs_hook=None,
+ list_hook=None,
+ unicode_errors=None,
+ max_buffer_size=100 * 1024 * 1024,
+ ext_hook=ExtType,
+ max_str_len=-1,
+ max_bin_len=-1,
+ max_array_len=-1,
+ max_map_len=-1,
+ max_ext_len=-1,
+ ):
+ if unicode_errors is None:
+ unicode_errors = "strict"
+
if file_like is None:
- self._fb_feeding = True
+ self._feeding = True
else:
if not callable(file_like.read):
- raise ValueError("`file_like.read` must be callable")
+ raise TypeError("`file_like.read` must be callable")
self.file_like = file_like
- self._fb_feeding = False
- self._fb_buffers = []
- self._fb_buf_o = 0
- self._fb_buf_i = 0
- self._fb_buf_n = 0
- self._max_buffer_size = (2**31-1 if max_buffer_size == 0
- else max_buffer_size)
- self._read_size = (read_size if read_size != 0
- else min(self._max_buffer_size, 2048))
+ self._feeding = False
+
+ #: array of bytes fed.
+ self._buffer = bytearray()
+ #: Which position we currently reads
+ self._buff_i = 0
+
+ # When Unpacker is used as an iterable, between the calls to next(),
+ # the buffer is not "consumed" completely, for efficiency sake.
+ # Instead, it is done sloppily. To make sure we raise BufferFull at
+ # the correct moments, we have to keep track of how sloppy we were.
+ # Furthermore, when the buffer is incomplete (that is: in the case
+ # we raise an OutOfData) we need to rollback the buffer to the correct
+ # state, which _buf_checkpoint records.
+ self._buf_checkpoint = 0
+
+ if not max_buffer_size:
+ max_buffer_size = 2**31 - 1
+ if max_str_len == -1:
+ max_str_len = max_buffer_size
+ if max_bin_len == -1:
+ max_bin_len = max_buffer_size
+ if max_array_len == -1:
+ max_array_len = max_buffer_size
+ if max_map_len == -1:
+ max_map_len = max_buffer_size // 2
+ if max_ext_len == -1:
+ max_ext_len = max_buffer_size
+
+ self._max_buffer_size = max_buffer_size
if read_size > self._max_buffer_size:
raise ValueError("read_size must be smaller than max_buffer_size")
- self._encoding = encoding
+ self._read_size = read_size or min(self._max_buffer_size, 16 * 1024)
+ self._raw = bool(raw)
+ self._strict_map_key = bool(strict_map_key)
self._unicode_errors = unicode_errors
self._use_list = use_list
+ if not (0 <= timestamp <= 3):
+ raise ValueError("timestamp must be 0..3")
+ self._timestamp = timestamp
self._list_hook = list_hook
self._object_hook = object_hook
self._object_pairs_hook = object_pairs_hook
+ self._ext_hook = ext_hook
+ self._max_str_len = max_str_len
+ self._max_bin_len = max_bin_len
+ self._max_array_len = max_array_len
+ self._max_map_len = max_map_len
+ self._max_ext_len = max_ext_len
+ self._stream_offset = 0
if list_hook is not None and not callable(list_hook):
- raise ValueError('`list_hook` is not callable')
+ raise TypeError("`list_hook` is not callable")
if object_hook is not None and not callable(object_hook):
- raise ValueError('`object_hook` is not callable')
+ raise TypeError("`object_hook` is not callable")
if object_pairs_hook is not None and not callable(object_pairs_hook):
- raise ValueError('`object_pairs_hook` is not callable')
+ raise TypeError("`object_pairs_hook` is not callable")
if object_hook is not None and object_pairs_hook is not None:
- raise ValueError("object_pairs_hook and object_hook are mutually "
- "exclusive")
+ raise TypeError("object_pairs_hook and object_hook are mutually exclusive")
+ if not callable(ext_hook):
+ raise TypeError("`ext_hook` is not callable")
def feed(self, next_bytes):
- if isinstance(next_bytes, array.array):
- next_bytes = next_bytes.tostring()
- assert self._fb_feeding
- if self._fb_buf_n + len(next_bytes) > self._max_buffer_size:
+ assert self._feeding
+ view = _get_data_from_buffer(next_bytes)
+ if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size:
raise BufferFull
- self._fb_buf_n += len(next_bytes)
- self._fb_buffers.append(next_bytes)
-
- def _fb_consume(self):
- self._fb_buffers = self._fb_buffers[self._fb_buf_i:]
- if self._fb_buffers:
- self._fb_buffers[0] = self._fb_buffers[0][self._fb_buf_o:]
- self._fb_buf_o = 0
- self._fb_buf_i = 0
- self._fb_buf_n = sum(map(len, self._fb_buffers))
-
- def _fb_got_extradata(self):
- if self._fb_buf_i != len(self._fb_buffers):
- return True
- if self._fb_feeding:
- return False
- if not self.file_like:
- return False
- if self.file_like.read(1):
- return True
- return False
- def __iter__(self):
- return self
+ # Strip buffer before checkpoint before reading file.
+ if self._buf_checkpoint > 0:
+ del self._buffer[: self._buf_checkpoint]
+ self._buff_i -= self._buf_checkpoint
+ self._buf_checkpoint = 0
+
+ # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython
+ self._buffer.extend(view)
+ view.release()
+
+ def _consume(self):
+ """Gets rid of the used parts of the buffer."""
+ self._stream_offset += self._buff_i - self._buf_checkpoint
+ self._buf_checkpoint = self._buff_i
+
+ def _got_extradata(self):
+ return self._buff_i < len(self._buffer)
+
+ def _get_extradata(self):
+ return self._buffer[self._buff_i :]
def read_bytes(self, n):
- return self._fb_read(n)
-
- def _fb_rollback(self):
- self._fb_buf_i = 0
- self._fb_buf_o = 0
-
- def _fb_get_extradata(self):
- bufs = self._fb_buffers[self._fb_buf_i:]
- if bufs:
- bufs[0] = bufs[0][self._fb_buf_o:]
- return b''.join(bufs)
-
- def _fb_read(self, n, write_bytes=None):
- if (write_bytes is None and self._fb_buf_i < len(self._fb_buffers)
- and self._fb_buf_o + n < len(self._fb_buffers[self._fb_buf_i])):
- self._fb_buf_o += n
- return self._fb_buffers[self._fb_buf_i][
- self._fb_buf_o-n:self._fb_buf_o]
- ret = b''
- while len(ret) != n:
- if self._fb_buf_i == len(self._fb_buffers):
- if self._fb_feeding:
- break
- tmp = self.file_like.read(self._read_size)
- if not tmp:
- break
- self._fb_buffers.append(tmp)
- continue
- sliced = n - len(ret)
- ret += self._fb_buffers[self._fb_buf_i][
- self._fb_buf_o:self._fb_buf_o + sliced]
- self._fb_buf_o += sliced
- if self._fb_buf_o >= len(self._fb_buffers[self._fb_buf_i]):
- self._fb_buf_o = 0
- self._fb_buf_i += 1
- if len(ret) != n:
- self._fb_rollback()
- raise OutOfData
- if write_bytes is not None:
- write_bytes(ret)
+ ret = self._read(n, raise_outofdata=False)
+ self._consume()
+ return ret
+
+ def _read(self, n, raise_outofdata=True):
+ # (int) -> bytearray
+ self._reserve(n, raise_outofdata=raise_outofdata)
+ i = self._buff_i
+ ret = self._buffer[i : i + n]
+ self._buff_i = i + len(ret)
return ret
- def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None):
+ def _reserve(self, n, raise_outofdata=True):
+ remain_bytes = len(self._buffer) - self._buff_i - n
+
+ # Fast path: buffer has n bytes already
+ if remain_bytes >= 0:
+ return
+
+ if self._feeding:
+ self._buff_i = self._buf_checkpoint
+ raise OutOfData
+
+ # Strip buffer before checkpoint before reading file.
+ if self._buf_checkpoint > 0:
+ del self._buffer[: self._buf_checkpoint]
+ self._buff_i -= self._buf_checkpoint
+ self._buf_checkpoint = 0
+
+ # Read from file
+ remain_bytes = -remain_bytes
+ if remain_bytes + len(self._buffer) > self._max_buffer_size:
+ raise BufferFull
+ while remain_bytes > 0:
+ to_read_bytes = max(self._read_size, remain_bytes)
+ read_data = self.file_like.read(to_read_bytes)
+ if not read_data:
+ break
+ assert isinstance(read_data, bytes)
+ self._buffer += read_data
+ remain_bytes -= len(read_data)
+
+ if len(self._buffer) < n + self._buff_i and raise_outofdata:
+ self._buff_i = 0 # rollback
+ raise OutOfData
+
+ def _read_header(self):
typ = TYPE_IMMEDIATE
n = 0
obj = None
- c = self._fb_read(1, write_bytes)
- b = ord(c)
- if b & 0b10000000 == 0:
+ self._reserve(1)
+ b = self._buffer[self._buff_i]
+ self._buff_i += 1
+ if b & 0b10000000 == 0:
obj = b
elif b & 0b11100000 == 0b11100000:
- obj = struct.unpack("b", c)[0]
+ obj = -1 - (b ^ 0xFF)
elif b & 0b11100000 == 0b10100000:
n = b & 0b00011111
- obj = self._fb_read(n, write_bytes)
typ = TYPE_RAW
+ if n > self._max_str_len:
+ raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})")
+ obj = self._read(n)
elif b & 0b11110000 == 0b10010000:
n = b & 0b00001111
typ = TYPE_ARRAY
+ if n > self._max_array_len:
+ raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})")
elif b & 0b11110000 == 0b10000000:
n = b & 0b00001111
typ = TYPE_MAP
- elif b == 0xc0:
+ if n > self._max_map_len:
+ raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})")
+ elif b == 0xC0:
obj = None
- elif b == 0xc2:
+ elif b == 0xC2:
obj = False
- elif b == 0xc3:
+ elif b == 0xC3:
obj = True
- elif b == 0xca:
- obj = struct.unpack(">f", self._fb_read(4, write_bytes))[0]
- elif b == 0xcb:
- obj = struct.unpack(">d", self._fb_read(8, write_bytes))[0]
- elif b == 0xcc:
- obj = struct.unpack("B", self._fb_read(1, write_bytes))[0]
- elif b == 0xcd:
- obj = struct.unpack(">H", self._fb_read(2, write_bytes))[0]
- elif b == 0xce:
- obj = struct.unpack(">I", self._fb_read(4, write_bytes))[0]
- elif b == 0xcf:
- obj = struct.unpack(">Q", self._fb_read(8, write_bytes))[0]
- elif b == 0xd0:
- obj = struct.unpack("b", self._fb_read(1, write_bytes))[0]
- elif b == 0xd1:
- obj = struct.unpack(">h", self._fb_read(2, write_bytes))[0]
- elif b == 0xd2:
- obj = struct.unpack(">i", self._fb_read(4, write_bytes))[0]
- elif b == 0xd3:
- obj = struct.unpack(">q", self._fb_read(8, write_bytes))[0]
- elif b == 0xda:
- n = struct.unpack(">H", self._fb_read(2, write_bytes))[0]
- obj = self._fb_read(n, write_bytes)
- typ = TYPE_RAW
- elif b == 0xdb:
- n = struct.unpack(">I", self._fb_read(4, write_bytes))[0]
- obj = self._fb_read(n, write_bytes)
- typ = TYPE_RAW
- elif b == 0xdc:
- n = struct.unpack(">H", self._fb_read(2, write_bytes))[0]
- typ = TYPE_ARRAY
- elif b == 0xdd:
- n = struct.unpack(">I", self._fb_read(4, write_bytes))[0]
- typ = TYPE_ARRAY
- elif b == 0xde:
- n = struct.unpack(">H", self._fb_read(2, write_bytes))[0]
- typ = TYPE_MAP
- elif b == 0xdf:
- n = struct.unpack(">I", self._fb_read(4, write_bytes))[0]
- typ = TYPE_MAP
+ elif 0xC4 <= b <= 0xC6:
+ size, fmt, typ = _MSGPACK_HEADERS[b]
+ self._reserve(size)
+ if len(fmt) > 0:
+ n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0]
+ else:
+ n = self._buffer[self._buff_i]
+ self._buff_i += size
+ if n > self._max_bin_len:
+ raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})")
+ obj = self._read(n)
+ elif 0xC7 <= b <= 0xC9:
+ size, fmt, typ = _MSGPACK_HEADERS[b]
+ self._reserve(size)
+ L, n = struct.unpack_from(fmt, self._buffer, self._buff_i)
+ self._buff_i += size
+ if L > self._max_ext_len:
+ raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})")
+ obj = self._read(L)
+ elif 0xCA <= b <= 0xD3:
+ size, fmt = _MSGPACK_HEADERS[b]
+ self._reserve(size)
+ if len(fmt) > 0:
+ obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0]
+ else:
+ obj = self._buffer[self._buff_i]
+ self._buff_i += size
+ elif 0xD4 <= b <= 0xD8:
+ size, fmt, typ = _MSGPACK_HEADERS[b]
+ if self._max_ext_len < size:
+ raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})")
+ self._reserve(size + 1)
+ n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i)
+ self._buff_i += size + 1
+ elif 0xD9 <= b <= 0xDB:
+ size, fmt, typ = _MSGPACK_HEADERS[b]
+ self._reserve(size)
+ if len(fmt) > 0:
+ (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
+ else:
+ n = self._buffer[self._buff_i]
+ self._buff_i += size
+ if n > self._max_str_len:
+ raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})")
+ obj = self._read(n)
+ elif 0xDC <= b <= 0xDD:
+ size, fmt, typ = _MSGPACK_HEADERS[b]
+ self._reserve(size)
+ (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
+ self._buff_i += size
+ if n > self._max_array_len:
+ raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})")
+ elif 0xDE <= b <= 0xDF:
+ size, fmt, typ = _MSGPACK_HEADERS[b]
+ self._reserve(size)
+ (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i)
+ self._buff_i += size
+ if n > self._max_map_len:
+ raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})")
else:
- raise UnpackValueError("Unknown header: 0x%x" % b)
+ raise FormatError("Unknown header: 0x%x" % b)
return typ, n, obj
- def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None):
- typ, n, obj = self._read_header(execute, write_bytes)
+ def _unpack(self, execute=EX_CONSTRUCT):
+ typ, n, obj = self._read_header()
if execute == EX_READ_ARRAY_HEADER:
if typ != TYPE_ARRAY:
- raise UnpackValueError("Expected array")
+ raise ValueError("Expected array")
return n
if execute == EX_READ_MAP_HEADER:
if typ != TYPE_MAP:
- raise UnpackValueError("Expected map")
+ raise ValueError("Expected map")
return n
# TODO should we eliminate the recursion?
if typ == TYPE_ARRAY:
if execute == EX_SKIP:
- for i in xrange(n):
+ for i in range(n):
# TODO check whether we need to call `list_hook`
- self._fb_unpack(EX_SKIP, write_bytes)
+ self._unpack(EX_SKIP)
return
ret = newlist_hint(n)
- for i in xrange(n):
- ret.append(self._fb_unpack(EX_CONSTRUCT, write_bytes))
+ for i in range(n):
+ ret.append(self._unpack(EX_CONSTRUCT))
if self._list_hook is not None:
ret = self._list_hook(ret)
# TODO is the interaction between `list_hook` and `use_list` ok?
return ret if self._use_list else tuple(ret)
if typ == TYPE_MAP:
if execute == EX_SKIP:
- for i in xrange(n):
+ for i in range(n):
# TODO check whether we need to call hooks
- self._fb_unpack(EX_SKIP, write_bytes)
- self._fb_unpack(EX_SKIP, write_bytes)
+ self._unpack(EX_SKIP)
+ self._unpack(EX_SKIP)
return
if self._object_pairs_hook is not None:
ret = self._object_pairs_hook(
- (self._fb_unpack(EX_CONSTRUCT, write_bytes),
- self._fb_unpack(EX_CONSTRUCT, write_bytes))
- for _ in xrange(n)
- )
+ (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n)
+ )
else:
ret = {}
- for _ in xrange(n):
- key = self._fb_unpack(EX_CONSTRUCT, write_bytes)
- ret[key] = self._fb_unpack(EX_CONSTRUCT, write_bytes)
+ for _ in range(n):
+ key = self._unpack(EX_CONSTRUCT)
+ if self._strict_map_key and type(key) not in (str, bytes):
+ raise ValueError("%s is not allowed for map key" % str(type(key)))
+ if isinstance(key, str):
+ key = sys.intern(key)
+ ret[key] = self._unpack(EX_CONSTRUCT)
if self._object_hook is not None:
ret = self._object_hook(ret)
return ret
if execute == EX_SKIP:
return
if typ == TYPE_RAW:
- if self._encoding is not None:
- obj = obj.decode(self._encoding, self._unicode_errors)
+ if self._raw:
+ obj = bytes(obj)
+ else:
+ obj = obj.decode("utf_8", self._unicode_errors)
return obj
+ if typ == TYPE_BIN:
+ return bytes(obj)
+ if typ == TYPE_EXT:
+ if n == -1: # timestamp
+ ts = Timestamp.from_bytes(bytes(obj))
+ if self._timestamp == 1:
+ return ts.to_unix()
+ elif self._timestamp == 2:
+ return ts.to_unix_nano()
+ elif self._timestamp == 3:
+ return ts.to_datetime()
+ else:
+ return ts
+ else:
+ return self._ext_hook(n, bytes(obj))
assert typ == TYPE_IMMEDIATE
return obj
- def next(self):
+ def __iter__(self):
+ return self
+
+ def __next__(self):
try:
- ret = self._fb_unpack(EX_CONSTRUCT, None)
- self._fb_consume()
+ ret = self._unpack(EX_CONSTRUCT)
+ self._consume()
return ret
except OutOfData:
+ self._consume()
raise StopIteration
- __next__ = next
+ except RecursionError:
+ raise StackError
+
+ next = __next__
- def skip(self, write_bytes=None):
- self._fb_unpack(EX_SKIP, write_bytes)
- self._fb_consume()
+ def skip(self):
+ self._unpack(EX_SKIP)
+ self._consume()
- def unpack(self, write_bytes=None):
- ret = self._fb_unpack(EX_CONSTRUCT, write_bytes)
- self._fb_consume()
+ def unpack(self):
+ try:
+ ret = self._unpack(EX_CONSTRUCT)
+ except RecursionError:
+ raise StackError
+ self._consume()
return ret
- def read_array_header(self, write_bytes=None):
- ret = self._fb_unpack(EX_READ_ARRAY_HEADER, write_bytes)
- self._fb_consume()
+ def read_array_header(self):
+ ret = self._unpack(EX_READ_ARRAY_HEADER)
+ self._consume()
return ret
- def read_map_header(self, write_bytes=None):
- ret = self._fb_unpack(EX_READ_MAP_HEADER, write_bytes)
- self._fb_consume()
+ def read_map_header(self):
+ ret = self._unpack(EX_READ_MAP_HEADER)
+ self._consume()
return ret
+ def tell(self):
+ return self._stream_offset
-class Packer(object):
+
+class Packer:
"""
MessagePack Packer
- usage:
+ Usage::
packer = Packer()
astream.write(packer.pack(a))
@@ -434,159 +613,317 @@ class Packer(object):
Packer's constructor has some keyword arguments:
- :param callable default:
+ :param default:
+ When specified, it should be callable.
Convert user type to builtin type that Packer supports.
See also simplejson's document.
- :param str encoding:
- Convert unicode to bytes with this encoding. (default: 'utf-8')
- :param str unicode_erros:
- Error handler for encoding unicode. (default: 'strict')
+
:param bool use_single_float:
Use single precision float type for float. (default: False)
+
:param bool autoreset:
- Reset buffer after each pack and return it's content as `bytes`. (default: True).
+ Reset buffer after each pack and return its content as `bytes`. (default: True).
If set this to false, use `bytes()` to get content and `.reset()` to clear buffer.
+
+ :param bool use_bin_type:
+ Use bin type introduced in msgpack spec 2.0 for bytes.
+ It also enables str8 type for unicode. (default: True)
+
+ :param bool strict_types:
+ If set to true, types will be checked to be exact. Derived classes
+ from serializable types will not be serialized and will be
+ treated as unsupported type and forwarded to default.
+ Additionally tuples will not be serialized as lists.
+ This is useful when trying to implement accurate serialization
+ for python types.
+
+ :param bool datetime:
+ If set to true, datetime with tzinfo is packed into Timestamp type.
+ Note that the tzinfo is stripped in the timestamp.
+ You can get UTC datetime with `timestamp=3` option of the Unpacker.
+
+ :param str unicode_errors:
+ The error handler for encoding unicode. (default: 'strict')
+ DO NOT USE THIS!! This option is kept for very specific usage.
+
+ :param int buf_size:
+ Internal buffer size. This option is used only for C implementation.
"""
- def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
- use_single_float=False, autoreset=True):
+
+ def __init__(
+ self,
+ *,
+ default=None,
+ use_single_float=False,
+ autoreset=True,
+ use_bin_type=True,
+ strict_types=False,
+ datetime=False,
+ unicode_errors=None,
+ buf_size=None,
+ ):
+ self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset
- self._encoding = encoding
- self._unicode_errors = unicode_errors
- self._buffer = StringIO()
- if default is not None:
- if not callable(default):
- raise TypeError("default must be callable")
+ self._use_bin_type = use_bin_type
+ self._buffer = BytesIO()
+ self._datetime = bool(datetime)
+ self._unicode_errors = unicode_errors or "strict"
+ if default is not None and not callable(default):
+ raise TypeError("default must be callable")
self._default = default
- def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance):
- if nest_limit < 0:
- raise PackValueError("recursion limit exceeded")
- if obj is None:
- return self._buffer.write(b"\xc0")
- if isinstance(obj, bool):
- if obj:
- return self._buffer.write(b"\xc3")
- return self._buffer.write(b"\xc2")
- if isinstance(obj, int_types):
- if 0 <= obj < 0x80:
- return self._buffer.write(struct.pack("B", obj))
- if -0x20 <= obj < 0:
- return self._buffer.write(struct.pack("b", obj))
- if 0x80 <= obj <= 0xff:
- return self._buffer.write(struct.pack("BB", 0xcc, obj))
- if -0x80 <= obj < 0:
- return self._buffer.write(struct.pack(">Bb", 0xd0, obj))
- if 0xff < obj <= 0xffff:
- return self._buffer.write(struct.pack(">BH", 0xcd, obj))
- if -0x8000 <= obj < -0x80:
- return self._buffer.write(struct.pack(">Bh", 0xd1, obj))
- if 0xffff < obj <= 0xffffffff:
- return self._buffer.write(struct.pack(">BI", 0xce, obj))
- if -0x80000000 <= obj < -0x8000:
- return self._buffer.write(struct.pack(">Bi", 0xd2, obj))
- if 0xffffffff < obj <= 0xffffffffffffffff:
- return self._buffer.write(struct.pack(">BQ", 0xcf, obj))
- if -0x8000000000000000 <= obj < -0x80000000:
- return self._buffer.write(struct.pack(">Bq", 0xd3, obj))
- raise PackValueError("Integer value out of range")
- if isinstance(obj, (Unicode, bytes)):
- if isinstance(obj, Unicode):
- if self._encoding is None:
- raise TypeError(
- "Can't encode unicode string: "
- "no encoding is specified")
- obj = obj.encode(self._encoding, self._unicode_errors)
- n = len(obj)
- if n <= 0x1f:
- self._buffer.write(struct.pack('B', 0xa0 + n))
+ def _pack(
+ self,
+ obj,
+ nest_limit=DEFAULT_RECURSE_LIMIT,
+ check=isinstance,
+ check_type_strict=_check_type_strict,
+ ):
+ default_used = False
+ if self._strict_types:
+ check = check_type_strict
+ list_types = list
+ else:
+ list_types = (list, tuple)
+ while True:
+ if nest_limit < 0:
+ raise ValueError("recursion limit exceeded")
+ if obj is None:
+ return self._buffer.write(b"\xc0")
+ if check(obj, bool):
+ if obj:
+ return self._buffer.write(b"\xc3")
+ return self._buffer.write(b"\xc2")
+ if check(obj, int):
+ if 0 <= obj < 0x80:
+ return self._buffer.write(struct.pack("B", obj))
+ if -0x20 <= obj < 0:
+ return self._buffer.write(struct.pack("b", obj))
+ if 0x80 <= obj <= 0xFF:
+ return self._buffer.write(struct.pack("BB", 0xCC, obj))
+ if -0x80 <= obj < 0:
+ return self._buffer.write(struct.pack(">Bb", 0xD0, obj))
+ if 0xFF < obj <= 0xFFFF:
+ return self._buffer.write(struct.pack(">BH", 0xCD, obj))
+ if -0x8000 <= obj < -0x80:
+ return self._buffer.write(struct.pack(">Bh", 0xD1, obj))
+ if 0xFFFF < obj <= 0xFFFFFFFF:
+ return self._buffer.write(struct.pack(">BI", 0xCE, obj))
+ if -0x80000000 <= obj < -0x8000:
+ return self._buffer.write(struct.pack(">Bi", 0xD2, obj))
+ if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF:
+ return self._buffer.write(struct.pack(">BQ", 0xCF, obj))
+ if -0x8000000000000000 <= obj < -0x80000000:
+ return self._buffer.write(struct.pack(">Bq", 0xD3, obj))
+ if not default_used and self._default is not None:
+ obj = self._default(obj)
+ default_used = True
+ continue
+ raise OverflowError("Integer value out of range")
+ if check(obj, (bytes, bytearray)):
+ n = len(obj)
+ if n >= 2**32:
+ raise ValueError("%s is too large" % type(obj).__name__)
+ self._pack_bin_header(n)
return self._buffer.write(obj)
- if n <= 0xffff:
- self._buffer.write(struct.pack(">BH", 0xda, n))
+ if check(obj, str):
+ obj = obj.encode("utf-8", self._unicode_errors)
+ n = len(obj)
+ if n >= 2**32:
+ raise ValueError("String is too large")
+ self._pack_raw_header(n)
return self._buffer.write(obj)
- if n <= 0xffffffff:
- self._buffer.write(struct.pack(">BI", 0xdb, n))
+ if check(obj, memoryview):
+ n = obj.nbytes
+ if n >= 2**32:
+ raise ValueError("Memoryview is too large")
+ self._pack_bin_header(n)
return self._buffer.write(obj)
- raise PackValueError("String is too large")
- if isinstance(obj, float):
- if self._use_float:
- return self._buffer.write(struct.pack(">Bf", 0xca, obj))
- return self._buffer.write(struct.pack(">Bd", 0xcb, obj))
- if isinstance(obj, list) or isinstance(obj, tuple):
- n = len(obj)
- self._fb_pack_array_header(n)
- for i in xrange(n):
- self._pack(obj[i], nest_limit - 1)
- return
- if isinstance(obj, dict):
- return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj),
- nest_limit - 1)
- if self._default is not None:
- return self._pack(self._default(obj), nest_limit - 1)
- raise TypeError("Cannot serialize %r" % obj)
+ if check(obj, float):
+ if self._use_float:
+ return self._buffer.write(struct.pack(">Bf", 0xCA, obj))
+ return self._buffer.write(struct.pack(">Bd", 0xCB, obj))
+ if check(obj, (ExtType, Timestamp)):
+ if check(obj, Timestamp):
+ code = -1
+ data = obj.to_bytes()
+ else:
+ code = obj.code
+ data = obj.data
+ assert isinstance(code, int)
+ assert isinstance(data, bytes)
+ L = len(data)
+ if L == 1:
+ self._buffer.write(b"\xd4")
+ elif L == 2:
+ self._buffer.write(b"\xd5")
+ elif L == 4:
+ self._buffer.write(b"\xd6")
+ elif L == 8:
+ self._buffer.write(b"\xd7")
+ elif L == 16:
+ self._buffer.write(b"\xd8")
+ elif L <= 0xFF:
+ self._buffer.write(struct.pack(">BB", 0xC7, L))
+ elif L <= 0xFFFF:
+ self._buffer.write(struct.pack(">BH", 0xC8, L))
+ else:
+ self._buffer.write(struct.pack(">BI", 0xC9, L))
+ self._buffer.write(struct.pack("b", code))
+ self._buffer.write(data)
+ return
+ if check(obj, list_types):
+ n = len(obj)
+ self._pack_array_header(n)
+ for i in range(n):
+ self._pack(obj[i], nest_limit - 1)
+ return
+ if check(obj, dict):
+ return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1)
+
+ if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None:
+ obj = Timestamp.from_datetime(obj)
+ default_used = 1
+ continue
+
+ if not default_used and self._default is not None:
+ obj = self._default(obj)
+ default_used = 1
+ continue
+
+ if self._datetime and check(obj, _DateTime):
+ raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None")
+
+ raise TypeError(f"Cannot serialize {obj!r}")
def pack(self, obj):
- self._pack(obj)
- ret = self._buffer.getvalue()
+ try:
+ self._pack(obj)
+ except:
+ self._buffer = BytesIO() # force reset
+ raise
if self._autoreset:
- self._buffer = StringIO()
- elif USING_STRINGBUILDER:
- self._buffer = StringIO(ret)
- return ret
+ ret = self._buffer.getvalue()
+ self._buffer = BytesIO()
+ return ret
def pack_map_pairs(self, pairs):
- self._fb_pack_map_pairs(len(pairs), pairs)
- ret = self._buffer.getvalue()
+ self._pack_map_pairs(len(pairs), pairs)
if self._autoreset:
- self._buffer = StringIO()
- elif USING_STRINGBUILDER:
- self._buffer = StringIO(ret)
- return ret
+ ret = self._buffer.getvalue()
+ self._buffer = BytesIO()
+ return ret
def pack_array_header(self, n):
- self._fb_pack_array_header(n)
- ret = self._buffer.getvalue()
+ if n >= 2**32:
+ raise ValueError
+ self._pack_array_header(n)
if self._autoreset:
- self._buffer = StringIO()
- elif USING_STRINGBUILDER:
- self._buffer = StringIO(ret)
- return ret
+ ret = self._buffer.getvalue()
+ self._buffer = BytesIO()
+ return ret
def pack_map_header(self, n):
- self._fb_pack_map_header(n)
- ret = self._buffer.getvalue()
+ if n >= 2**32:
+ raise ValueError
+ self._pack_map_header(n)
if self._autoreset:
- self._buffer = StringIO()
- elif USING_STRINGBUILDER:
- self._buffer = StringIO(ret)
- return ret
+ ret = self._buffer.getvalue()
+ self._buffer = BytesIO()
+ return ret
- def _fb_pack_array_header(self, n):
- if n <= 0x0f:
- return self._buffer.write(struct.pack('B', 0x90 + n))
- if n <= 0xffff:
- return self._buffer.write(struct.pack(">BH", 0xdc, n))
- if n <= 0xffffffff:
- return self._buffer.write(struct.pack(">BI", 0xdd, n))
- raise PackValueError("Array is too large")
-
- def _fb_pack_map_header(self, n):
- if n <= 0x0f:
- return self._buffer.write(struct.pack('B', 0x80 + n))
- if n <= 0xffff:
- return self._buffer.write(struct.pack(">BH", 0xde, n))
- if n <= 0xffffffff:
- return self._buffer.write(struct.pack(">BI", 0xdf, n))
- raise PackValueError("Dict is too large")
-
- def _fb_pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT):
- self._fb_pack_map_header(n)
- for (k, v) in pairs:
+ def pack_ext_type(self, typecode, data):
+ if not isinstance(typecode, int):
+ raise TypeError("typecode must have int type.")
+ if not 0 <= typecode <= 127:
+ raise ValueError("typecode should be 0-127")
+ if not isinstance(data, bytes):
+ raise TypeError("data must have bytes type")
+ L = len(data)
+ if L > 0xFFFFFFFF:
+ raise ValueError("Too large data")
+ if L == 1:
+ self._buffer.write(b"\xd4")
+ elif L == 2:
+ self._buffer.write(b"\xd5")
+ elif L == 4:
+ self._buffer.write(b"\xd6")
+ elif L == 8:
+ self._buffer.write(b"\xd7")
+ elif L == 16:
+ self._buffer.write(b"\xd8")
+ elif L <= 0xFF:
+ self._buffer.write(b"\xc7" + struct.pack("B", L))
+ elif L <= 0xFFFF:
+ self._buffer.write(b"\xc8" + struct.pack(">H", L))
+ else:
+ self._buffer.write(b"\xc9" + struct.pack(">I", L))
+ self._buffer.write(struct.pack("B", typecode))
+ self._buffer.write(data)
+
+ def _pack_array_header(self, n):
+ if n <= 0x0F:
+ return self._buffer.write(struct.pack("B", 0x90 + n))
+ if n <= 0xFFFF:
+ return self._buffer.write(struct.pack(">BH", 0xDC, n))
+ if n <= 0xFFFFFFFF:
+ return self._buffer.write(struct.pack(">BI", 0xDD, n))
+ raise ValueError("Array is too large")
+
+ def _pack_map_header(self, n):
+ if n <= 0x0F:
+ return self._buffer.write(struct.pack("B", 0x80 + n))
+ if n <= 0xFFFF:
+ return self._buffer.write(struct.pack(">BH", 0xDE, n))
+ if n <= 0xFFFFFFFF:
+ return self._buffer.write(struct.pack(">BI", 0xDF, n))
+ raise ValueError("Dict is too large")
+
+ def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT):
+ self._pack_map_header(n)
+ for k, v in pairs:
self._pack(k, nest_limit - 1)
self._pack(v, nest_limit - 1)
+ def _pack_raw_header(self, n):
+ if n <= 0x1F:
+ self._buffer.write(struct.pack("B", 0xA0 + n))
+ elif self._use_bin_type and n <= 0xFF:
+ self._buffer.write(struct.pack(">BB", 0xD9, n))
+ elif n <= 0xFFFF:
+ self._buffer.write(struct.pack(">BH", 0xDA, n))
+ elif n <= 0xFFFFFFFF:
+ self._buffer.write(struct.pack(">BI", 0xDB, n))
+ else:
+ raise ValueError("Raw is too large")
+
+ def _pack_bin_header(self, n):
+ if not self._use_bin_type:
+ return self._pack_raw_header(n)
+ elif n <= 0xFF:
+ return self._buffer.write(struct.pack(">BB", 0xC4, n))
+ elif n <= 0xFFFF:
+ return self._buffer.write(struct.pack(">BH", 0xC5, n))
+ elif n <= 0xFFFFFFFF:
+ return self._buffer.write(struct.pack(">BI", 0xC6, n))
+ else:
+ raise ValueError("Bin is too large")
+
def bytes(self):
+ """Return internal buffer contents as bytes object"""
return self._buffer.getvalue()
def reset(self):
- self._buffer = StringIO()
+ """Reset internal buffer.
+
+ This method is useful only when autoreset=False.
+ """
+ self._buffer = BytesIO()
+
+ def getbuffer(self):
+ """Return view of internal buffer."""
+ if _USING_STRINGBUILDER:
+ return memoryview(self.bytes())
+ else:
+ return self._buffer.getbuffer()
diff --git a/msgpack/pack.h b/msgpack/pack.h
index 15399914..edf3a3fe 100644
--- a/msgpack/pack.h
+++ b/msgpack/pack.h
@@ -21,55 +21,21 @@
#include "sysdep.h"
#include
#include
+#include
#ifdef __cplusplus
extern "C" {
#endif
-#ifdef _MSC_VER
-#define inline __inline
-#endif
-
typedef struct msgpack_packer {
char *buf;
size_t length;
size_t buf_size;
+ bool use_bin_type;
} msgpack_packer;
typedef struct Packer Packer;
-static inline int msgpack_pack_short(msgpack_packer* pk, short d);
-static inline int msgpack_pack_int(msgpack_packer* pk, int d);
-static inline int msgpack_pack_long(msgpack_packer* pk, long d);
-static inline int msgpack_pack_long_long(msgpack_packer* pk, long long d);
-static inline int msgpack_pack_unsigned_short(msgpack_packer* pk, unsigned short d);
-static inline int msgpack_pack_unsigned_int(msgpack_packer* pk, unsigned int d);
-static inline int msgpack_pack_unsigned_long(msgpack_packer* pk, unsigned long d);
-//static inline int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d);
-
-static inline int msgpack_pack_uint8(msgpack_packer* pk, uint8_t d);
-static inline int msgpack_pack_uint16(msgpack_packer* pk, uint16_t d);
-static inline int msgpack_pack_uint32(msgpack_packer* pk, uint32_t d);
-static inline int msgpack_pack_uint64(msgpack_packer* pk, uint64_t d);
-static inline int msgpack_pack_int8(msgpack_packer* pk, int8_t d);
-static inline int msgpack_pack_int16(msgpack_packer* pk, int16_t d);
-static inline int msgpack_pack_int32(msgpack_packer* pk, int32_t d);
-static inline int msgpack_pack_int64(msgpack_packer* pk, int64_t d);
-
-static inline int msgpack_pack_float(msgpack_packer* pk, float d);
-static inline int msgpack_pack_double(msgpack_packer* pk, double d);
-
-static inline int msgpack_pack_nil(msgpack_packer* pk);
-static inline int msgpack_pack_true(msgpack_packer* pk);
-static inline int msgpack_pack_false(msgpack_packer* pk);
-
-static inline int msgpack_pack_array(msgpack_packer* pk, unsigned int n);
-
-static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n);
-
-static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l);
-static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l);
-
static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l)
{
char* buf = pk->buf;
@@ -78,8 +44,11 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_
if (len + l > bs) {
bs = (len + l) * 2;
- buf = (char*)realloc(buf, bs);
- if (!buf) return -1;
+ buf = (char*)PyMem_Realloc(buf, bs);
+ if (!buf) {
+ PyErr_NoMemory();
+ return -1;
+ }
}
memcpy(buf + len, data, l);
len += l;
diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h
index 9e00d7e8..b8959f02 100644
--- a/msgpack/pack_template.h
+++ b/msgpack/pack_template.h
@@ -37,18 +37,6 @@
* Integer
*/
-#define msgpack_pack_real_uint8(x, d) \
-do { \
- if(d < (1<<7)) { \
- /* fixnum */ \
- msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \
- } else { \
- /* unsigned 8 */ \
- unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \
- msgpack_pack_append_buffer(x, buf, 2); \
- } \
-} while(0)
-
#define msgpack_pack_real_uint16(x, d) \
do { \
if(d < (1<<7)) { \
@@ -123,18 +111,6 @@ do { \
} \
} while(0)
-#define msgpack_pack_real_int8(x, d) \
-do { \
- if(d < -(1<<5)) { \
- /* signed 8 */ \
- unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \
- msgpack_pack_append_buffer(x, buf, 2); \
- } else { \
- /* fixnum */ \
- msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \
- } \
-} while(0)
-
#define msgpack_pack_real_int16(x, d) \
do { \
if(d < -(1<<5)) { \
@@ -264,49 +240,6 @@ do { \
} while(0)
-static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d)
-{
- msgpack_pack_real_uint8(x, d);
-}
-
-static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d)
-{
- msgpack_pack_real_uint16(x, d);
-}
-
-static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d)
-{
- msgpack_pack_real_uint32(x, d);
-}
-
-static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d)
-{
- msgpack_pack_real_uint64(x, d);
-}
-
-static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d)
-{
- msgpack_pack_real_int8(x, d);
-}
-
-static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d)
-{
- msgpack_pack_real_int16(x, d);
-}
-
-static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d)
-{
- msgpack_pack_real_int32(x, d);
-}
-
-static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d)
-{
- msgpack_pack_real_int64(x, d);
-}
-
-
-//#ifdef msgpack_pack_inline_func_cint
-
static inline int msgpack_pack_short(msgpack_packer* x, short d)
{
#if defined(SIZEOF_SHORT)
@@ -372,193 +305,38 @@ if(sizeof(int) == 2) {
static inline int msgpack_pack_long(msgpack_packer* x, long d)
{
#if defined(SIZEOF_LONG)
-#if SIZEOF_LONG == 2
- msgpack_pack_real_int16(x, d);
-#elif SIZEOF_LONG == 4
+#if SIZEOF_LONG == 4
msgpack_pack_real_int32(x, d);
#else
msgpack_pack_real_int64(x, d);
#endif
#elif defined(LONG_MAX)
-#if LONG_MAX == 0x7fffL
- msgpack_pack_real_int16(x, d);
-#elif LONG_MAX == 0x7fffffffL
+#if LONG_MAX == 0x7fffffffL
msgpack_pack_real_int32(x, d);
#else
msgpack_pack_real_int64(x, d);
#endif
#else
-if(sizeof(long) == 2) {
- msgpack_pack_real_int16(x, d);
-} else if(sizeof(long) == 4) {
- msgpack_pack_real_int32(x, d);
-} else {
- msgpack_pack_real_int64(x, d);
-}
+ if (sizeof(long) == 4) {
+ msgpack_pack_real_int32(x, d);
+ } else {
+ msgpack_pack_real_int64(x, d);
+ }
#endif
}
static inline int msgpack_pack_long_long(msgpack_packer* x, long long d)
{
-#if defined(SIZEOF_LONG_LONG)
-#if SIZEOF_LONG_LONG == 2
- msgpack_pack_real_int16(x, d);
-#elif SIZEOF_LONG_LONG == 4
- msgpack_pack_real_int32(x, d);
-#else
msgpack_pack_real_int64(x, d);
-#endif
-
-#elif defined(LLONG_MAX)
-#if LLONG_MAX == 0x7fffL
- msgpack_pack_real_int16(x, d);
-#elif LLONG_MAX == 0x7fffffffL
- msgpack_pack_real_int32(x, d);
-#else
- msgpack_pack_real_int64(x, d);
-#endif
-
-#else
-if(sizeof(long long) == 2) {
- msgpack_pack_real_int16(x, d);
-} else if(sizeof(long long) == 4) {
- msgpack_pack_real_int32(x, d);
-} else {
- msgpack_pack_real_int64(x, d);
-}
-#endif
-}
-
-static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d)
-{
-#if defined(SIZEOF_SHORT)
-#if SIZEOF_SHORT == 2
- msgpack_pack_real_uint16(x, d);
-#elif SIZEOF_SHORT == 4
- msgpack_pack_real_uint32(x, d);
-#else
- msgpack_pack_real_uint64(x, d);
-#endif
-
-#elif defined(USHRT_MAX)
-#if USHRT_MAX == 0xffffU
- msgpack_pack_real_uint16(x, d);
-#elif USHRT_MAX == 0xffffffffU
- msgpack_pack_real_uint32(x, d);
-#else
- msgpack_pack_real_uint64(x, d);
-#endif
-
-#else
-if(sizeof(unsigned short) == 2) {
- msgpack_pack_real_uint16(x, d);
-} else if(sizeof(unsigned short) == 4) {
- msgpack_pack_real_uint32(x, d);
-} else {
- msgpack_pack_real_uint64(x, d);
-}
-#endif
-}
-
-static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d)
-{
-#if defined(SIZEOF_INT)
-#if SIZEOF_INT == 2
- msgpack_pack_real_uint16(x, d);
-#elif SIZEOF_INT == 4
- msgpack_pack_real_uint32(x, d);
-#else
- msgpack_pack_real_uint64(x, d);
-#endif
-
-#elif defined(UINT_MAX)
-#if UINT_MAX == 0xffffU
- msgpack_pack_real_uint16(x, d);
-#elif UINT_MAX == 0xffffffffU
- msgpack_pack_real_uint32(x, d);
-#else
- msgpack_pack_real_uint64(x, d);
-#endif
-
-#else
-if(sizeof(unsigned int) == 2) {
- msgpack_pack_real_uint16(x, d);
-} else if(sizeof(unsigned int) == 4) {
- msgpack_pack_real_uint32(x, d);
-} else {
- msgpack_pack_real_uint64(x, d);
-}
-#endif
-}
-
-static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d)
-{
-#if defined(SIZEOF_LONG)
-#if SIZEOF_LONG == 2
- msgpack_pack_real_uint16(x, d);
-#elif SIZEOF_LONG == 4
- msgpack_pack_real_uint32(x, d);
-#else
- msgpack_pack_real_uint64(x, d);
-#endif
-
-#elif defined(ULONG_MAX)
-#if ULONG_MAX == 0xffffUL
- msgpack_pack_real_uint16(x, d);
-#elif ULONG_MAX == 0xffffffffUL
- msgpack_pack_real_uint32(x, d);
-#else
- msgpack_pack_real_uint64(x, d);
-#endif
-
-#else
-if(sizeof(unsigned long) == 2) {
- msgpack_pack_real_uint16(x, d);
-} else if(sizeof(unsigned long) == 4) {
- msgpack_pack_real_uint32(x, d);
-} else {
- msgpack_pack_real_uint64(x, d);
-}
-#endif
}
static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d)
{
-#if defined(SIZEOF_LONG_LONG)
-#if SIZEOF_LONG_LONG == 2
- msgpack_pack_real_uint16(x, d);
-#elif SIZEOF_LONG_LONG == 4
- msgpack_pack_real_uint32(x, d);
-#else
msgpack_pack_real_uint64(x, d);
-#endif
-
-#elif defined(ULLONG_MAX)
-#if ULLONG_MAX == 0xffffUL
- msgpack_pack_real_uint16(x, d);
-#elif ULLONG_MAX == 0xffffffffUL
- msgpack_pack_real_uint32(x, d);
-#else
- msgpack_pack_real_uint64(x, d);
-#endif
-
-#else
-if(sizeof(unsigned long long) == 2) {
- msgpack_pack_real_uint16(x, d);
-} else if(sizeof(unsigned long long) == 4) {
- msgpack_pack_real_uint32(x, d);
-} else {
- msgpack_pack_real_uint64(x, d);
-}
-#endif
}
-//#undef msgpack_pack_inline_func_cint
-//#endif
-
-
/*
* Float
@@ -566,24 +344,26 @@ if(sizeof(unsigned long long) == 2) {
static inline int msgpack_pack_float(msgpack_packer* x, float d)
{
- union { float f; uint32_t i; } mem;
- mem.f = d;
unsigned char buf[5];
- buf[0] = 0xca; _msgpack_store32(&buf[1], mem.i);
+ buf[0] = 0xca;
+
+#if PY_VERSION_HEX >= 0x030B00A7
+ PyFloat_Pack4(d, (char *)&buf[1], 0);
+#else
+ _PyFloat_Pack4(d, &buf[1], 0);
+#endif
msgpack_pack_append_buffer(x, buf, 5);
}
static inline int msgpack_pack_double(msgpack_packer* x, double d)
{
- union { double f; uint64_t i; } mem;
- mem.f = d;
unsigned char buf[9];
buf[0] = 0xcb;
-#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi
- // https://github.com/msgpack/msgpack-perl/pull/1
- mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL);
+#if PY_VERSION_HEX >= 0x030B00A7
+ PyFloat_Pack8(d, (char *)&buf[1], 0);
+#else
+ _PyFloat_Pack8(d, &buf[1], 0);
#endif
- _msgpack_store64(&buf[1], mem.i);
msgpack_pack_append_buffer(x, buf, 9);
}
@@ -664,10 +444,13 @@ static inline int msgpack_pack_map(msgpack_packer* x, unsigned int n)
static inline int msgpack_pack_raw(msgpack_packer* x, size_t l)
{
- if(l < 32) {
+ if (l < 32) {
unsigned char d = 0xa0 | (uint8_t)l;
msgpack_pack_append_buffer(x, &TAKE8_8(d), 1);
- } else if(l < 65536) {
+ } else if (x->use_bin_type && l < 256) { // str8 is new format introduced with bin.
+ unsigned char buf[2] = {0xd9, (uint8_t)l};
+ msgpack_pack_append_buffer(x, buf, 2);
+ } else if (l < 65536) {
unsigned char buf[3];
buf[0] = 0xda; _msgpack_store16(&buf[1], (uint16_t)l);
msgpack_pack_append_buffer(x, buf, 3);
@@ -678,11 +461,126 @@ static inline int msgpack_pack_raw(msgpack_packer* x, size_t l)
}
}
+/*
+ * bin
+ */
+static inline int msgpack_pack_bin(msgpack_packer *x, size_t l)
+{
+ if (!x->use_bin_type) {
+ return msgpack_pack_raw(x, l);
+ }
+ if (l < 256) {
+ unsigned char buf[2] = {0xc4, (unsigned char)l};
+ msgpack_pack_append_buffer(x, buf, 2);
+ } else if (l < 65536) {
+ unsigned char buf[3] = {0xc5};
+ _msgpack_store16(&buf[1], (uint16_t)l);
+ msgpack_pack_append_buffer(x, buf, 3);
+ } else {
+ unsigned char buf[5] = {0xc6};
+ _msgpack_store32(&buf[1], (uint32_t)l);
+ msgpack_pack_append_buffer(x, buf, 5);
+ }
+}
+
static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t l)
{
- msgpack_pack_append_buffer(x, (const unsigned char*)b, l);
+ if (l > 0) msgpack_pack_append_buffer(x, (const unsigned char*)b, l);
+ return 0;
}
+/*
+ * Ext
+ */
+static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l)
+{
+ if (l == 1) {
+ unsigned char buf[2];
+ buf[0] = 0xd4;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 2) {
+ unsigned char buf[2];
+ buf[0] = 0xd5;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 4) {
+ unsigned char buf[2];
+ buf[0] = 0xd6;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 8) {
+ unsigned char buf[2];
+ buf[0] = 0xd7;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l == 16) {
+ unsigned char buf[2];
+ buf[0] = 0xd8;
+ buf[1] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 2);
+ }
+ else if(l < 256) {
+ unsigned char buf[3];
+ buf[0] = 0xc7;
+ buf[1] = l;
+ buf[2] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 3);
+ } else if(l < 65536) {
+ unsigned char buf[4];
+ buf[0] = 0xc8;
+ _msgpack_store16(&buf[1], (uint16_t)l);
+ buf[3] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 4);
+ } else {
+ unsigned char buf[6];
+ buf[0] = 0xc9;
+ _msgpack_store32(&buf[1], (uint32_t)l);
+ buf[5] = (unsigned char)typecode;
+ msgpack_pack_append_buffer(x, buf, 6);
+ }
+
+}
+
+/*
+ * Pack Timestamp extension type. Follows msgpack-c pack_template.h.
+ */
+static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uint32_t nanoseconds)
+{
+ if ((seconds >> 34) == 0) {
+ /* seconds is unsigned and fits in 34 bits */
+ uint64_t data64 = ((uint64_t)nanoseconds << 34) | (uint64_t)seconds;
+ if ((data64 & 0xffffffff00000000L) == 0) {
+ /* no nanoseconds and seconds is 32bits or smaller. timestamp32. */
+ unsigned char buf[4];
+ uint32_t data32 = (uint32_t)data64;
+ msgpack_pack_ext(x, -1, 4);
+ _msgpack_store32(buf, data32);
+ msgpack_pack_raw_body(x, buf, 4);
+ } else {
+ /* timestamp64 */
+ unsigned char buf[8];
+ msgpack_pack_ext(x, -1, 8);
+ _msgpack_store64(buf, data64);
+ msgpack_pack_raw_body(x, buf, 8);
+
+ }
+ } else {
+ /* seconds is signed or >34bits */
+ unsigned char buf[12];
+ _msgpack_store32(&buf[0], nanoseconds);
+ _msgpack_store64(&buf[4], seconds);
+ msgpack_pack_ext(x, -1, 12);
+ msgpack_pack_raw_body(x, buf, 12);
+ }
+ return 0;
+}
+
+
#undef msgpack_pack_append_buffer
#undef TAKE8_8
@@ -690,11 +588,9 @@ static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t
#undef TAKE8_32
#undef TAKE8_64
-#undef msgpack_pack_real_uint8
#undef msgpack_pack_real_uint16
#undef msgpack_pack_real_uint32
#undef msgpack_pack_real_uint64
-#undef msgpack_pack_real_int8
#undef msgpack_pack_real_int16
#undef msgpack_pack_real_int32
#undef msgpack_pack_real_int64
diff --git a/msgpack/sysdep.h b/msgpack/sysdep.h
index ed9c1bc0..70673004 100644
--- a/msgpack/sysdep.h
+++ b/msgpack/sysdep.h
@@ -61,14 +61,14 @@ typedef unsigned int _msgpack_atomic_counter_t;
#endif
#endif
-#else
-#include /* __BYTE_ORDER */
+#else /* _WIN32 */
+#include /* ntohs, ntohl */
#endif
#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define __LITTLE_ENDIAN__
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define __BIG_ENDIAN__
#elif _WIN32
#define __LITTLE_ENDIAN__
@@ -95,7 +95,7 @@ typedef unsigned int _msgpack_atomic_counter_t;
#ifdef _WIN32
# if defined(ntohl)
# define _msgpack_be32(x) ntohl(x)
-# elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400)
+# elif defined(_byteswap_ulong) || defined(_MSC_VER)
# define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x))
# else
# define _msgpack_be32(x) \
@@ -108,7 +108,7 @@ typedef unsigned int _msgpack_atomic_counter_t;
# define _msgpack_be32(x) ntohl(x)
#endif
-#if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400)
+#if defined(_byteswap_uint64) || defined(_MSC_VER)
# define _msgpack_be64(x) (_byteswap_uint64(x))
#elif defined(bswap_64)
# define _msgpack_be64(x) bswap_64(x)
diff --git a/msgpack/unpack.h b/msgpack/unpack.h
index 595b8df6..58a2f4f5 100644
--- a/msgpack/unpack.h
+++ b/msgpack/unpack.h
@@ -20,18 +20,25 @@
#include "unpack_define.h"
typedef struct unpack_user {
- int use_list;
- PyObject *object_hook;
+ bool use_list;
+ bool raw;
bool has_pairs_hook;
+ bool strict_map_key;
+ int timestamp;
+ PyObject *object_hook;
PyObject *list_hook;
- const char *encoding;
+ PyObject *ext_hook;
+ PyObject *timestamp_t;
+ PyObject *giga;
+ PyObject *utc;
const char *unicode_errors;
+ Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len;
} unpack_user;
typedef PyObject* msgpack_unpack_object;
struct unpack_context;
typedef struct unpack_context unpack_context;
-typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off);
+typedef int (*execute_fn)(unpack_context *ctx, const char* data, Py_ssize_t len, Py_ssize_t* off);
static inline msgpack_unpack_object unpack_callback_root(unpack_user* u)
{
@@ -40,7 +47,7 @@ static inline msgpack_unpack_object unpack_callback_root(unpack_user* u)
static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o)
{
- PyObject *p = PyInt_FromLong((long)d);
+ PyObject *p = PyLong_FromLong((long)d);
if (!p)
return -1;
*o = p;
@@ -54,12 +61,7 @@ static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpac
static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o)
{
- PyObject *p;
- if (d > LONG_MAX) {
- p = PyLong_FromUnsignedLong((unsigned long)d);
- } else {
- p = PyInt_FromLong((long)d);
- }
+ PyObject *p = PyLong_FromSize_t((size_t)d);
if (!p)
return -1;
*o = p;
@@ -68,7 +70,12 @@ static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unp
static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unpack_object* o)
{
- PyObject *p = PyLong_FromUnsignedLongLong(d);
+ PyObject *p;
+ if (d > LONG_MAX) {
+ p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d);
+ } else {
+ p = PyLong_FromLong((long)d);
+ }
if (!p)
return -1;
*o = p;
@@ -77,7 +84,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp
static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o)
{
- PyObject *p = PyInt_FromLong(d);
+ PyObject *p = PyLong_FromLong(d);
if (!p)
return -1;
*o = p;
@@ -96,9 +103,12 @@ static inline int unpack_callback_int8(unpack_user* u, int8_t d, msgpack_unpack_
static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpack_object* o)
{
- PyObject *p = PyLong_FromLongLong(d);
- if (!p)
- return -1;
+ PyObject *p;
+ if (d > LONG_MAX || d < LONG_MIN) {
+ p = PyLong_FromLongLong((PY_LONG_LONG)d);
+ } else {
+ p = PyLong_FromLong((long)d);
+ }
*o = p;
return 0;
}
@@ -128,6 +138,10 @@ static inline int unpack_callback_false(unpack_user* u, msgpack_unpack_object* o
static inline int unpack_callback_array(unpack_user* u, unsigned int n, msgpack_unpack_object* o)
{
+ if (n > u->max_array_len) {
+ PyErr_Format(PyExc_ValueError, "%u exceeds max_array_len(%zd)", n, u->max_array_len);
+ return -1;
+ }
PyObject *p = u->use_list ? PyList_New(n) : PyTuple_New(n);
if (!p)
@@ -148,7 +162,7 @@ static inline int unpack_callback_array_item(unpack_user* u, unsigned int curren
static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c)
{
if (u->list_hook) {
- PyObject *new_c = PyEval_CallFunction(u->list_hook, "(O)", *c);
+ PyObject *new_c = PyObject_CallFunctionObjArgs(u->list_hook, *c, NULL);
if (!new_c)
return -1;
Py_DECREF(*c);
@@ -159,6 +173,10 @@ static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_objec
static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o)
{
+ if (n > u->max_map_len) {
+ PyErr_Format(PyExc_ValueError, "%u exceeds max_map_len(%zd)", n, u->max_map_len);
+ return -1;
+ }
PyObject *p;
if (u->has_pairs_hook) {
p = PyList_New(n); // Or use tuple?
@@ -174,6 +192,13 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_un
static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v)
{
+ if (u->strict_map_key && !PyUnicode_CheckExact(k) && !PyBytes_CheckExact(k)) {
+ PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key when strict_map_key=True", Py_TYPE(k)->tp_name);
+ return -1;
+ }
+ if (PyUnicode_CheckExact(k)) {
+ PyUnicode_InternInPlace(&k);
+ }
if (u->has_pairs_hook) {
msgpack_unpack_object item = PyTuple_Pack(2, k, v);
if (!item)
@@ -194,7 +219,7 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current,
static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c)
{
if (u->object_hook) {
- PyObject *new_c = PyEval_CallFunction(u->object_hook, "(O)", *c);
+ PyObject *new_c = PyObject_CallFunctionObjArgs(u->object_hook, *c, NULL);
if (!new_c)
return -1;
@@ -206,11 +231,156 @@ static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object*
static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o)
{
+ if (l > u->max_str_len) {
+ PyErr_Format(PyExc_ValueError, "%u exceeds max_str_len(%zd)", l, u->max_str_len);
+ return -1;
+ }
+
PyObject *py;
- if(u->encoding) {
- py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors);
- } else {
+
+ if (u->raw) {
py = PyBytes_FromStringAndSize(p, l);
+ } else {
+ py = PyUnicode_DecodeUTF8(p, l, u->unicode_errors);
+ }
+ if (!py)
+ return -1;
+ *o = py;
+ return 0;
+}
+
+static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o)
+{
+ if (l > u->max_bin_len) {
+ PyErr_Format(PyExc_ValueError, "%u exceeds max_bin_len(%zd)", l, u->max_bin_len);
+ return -1;
+ }
+
+ PyObject *py = PyBytes_FromStringAndSize(p, l);
+ if (!py)
+ return -1;
+ *o = py;
+ return 0;
+}
+
+typedef struct msgpack_timestamp {
+ int64_t tv_sec;
+ uint32_t tv_nsec;
+} msgpack_timestamp;
+
+/*
+ * Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h.
+ */
+static int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) {
+ switch (buflen) {
+ case 4:
+ ts->tv_nsec = 0;
+ {
+ uint32_t v = _msgpack_load32(uint32_t, buf);
+ ts->tv_sec = (int64_t)v;
+ }
+ return 0;
+ case 8: {
+ uint64_t value =_msgpack_load64(uint64_t, buf);
+ ts->tv_nsec = (uint32_t)(value >> 34);
+ ts->tv_sec = value & 0x00000003ffffffffLL;
+ return 0;
+ }
+ case 12:
+ ts->tv_nsec = _msgpack_load32(uint32_t, buf);
+ ts->tv_sec = _msgpack_load64(int64_t, buf + 4);
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+#include "datetime.h"
+
+static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
+ unsigned int length, msgpack_unpack_object* o)
+{
+ int8_t typecode = (int8_t)*pos++;
+ if (!u->ext_hook) {
+ PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL");
+ return -1;
+ }
+ if (length-1 > u->max_ext_len) {
+ PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len);
+ return -1;
+ }
+
+ PyObject *py = NULL;
+ // length also includes the typecode, so the actual data is length-1
+ if (typecode == -1) {
+ msgpack_timestamp ts;
+ if (unpack_timestamp(pos, length-1, &ts) < 0) {
+ return -1;
+ }
+
+ if (u->timestamp == 2) { // int
+ PyObject *a = PyLong_FromLongLong(ts.tv_sec);
+ if (a == NULL) return -1;
+
+ PyObject *c = PyNumber_Multiply(a, u->giga);
+ Py_DECREF(a);
+ if (c == NULL) {
+ return -1;
+ }
+
+ PyObject *b = PyLong_FromUnsignedLong(ts.tv_nsec);
+ if (b == NULL) {
+ Py_DECREF(c);
+ return -1;
+ }
+
+ py = PyNumber_Add(c, b);
+ Py_DECREF(c);
+ Py_DECREF(b);
+ }
+ else if (u->timestamp == 0) { // Timestamp
+ py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec);
+ }
+ else if (u->timestamp == 3) { // datetime
+ // Calculate datetime using epoch + delta
+ // due to limitations PyDateTime_FromTimestamp on Windows with negative timestamps
+ PyObject *epoch = PyDateTimeAPI->DateTime_FromDateAndTime(1970, 1, 1, 0, 0, 0, 0, u->utc, PyDateTimeAPI->DateTimeType);
+ if (epoch == NULL) {
+ return -1;
+ }
+
+ PyObject* d = PyDelta_FromDSU(ts.tv_sec/(24*3600), ts.tv_sec%(24*3600), ts.tv_nsec / 1000);
+ if (d == NULL) {
+ Py_DECREF(epoch);
+ return -1;
+ }
+
+ py = PyNumber_Add(epoch, d);
+
+ Py_DECREF(epoch);
+ Py_DECREF(d);
+ }
+ else { // float
+ PyObject *a = PyFloat_FromDouble((double)ts.tv_nsec);
+ if (a == NULL) return -1;
+
+ PyObject *b = PyNumber_TrueDivide(a, u->giga);
+ Py_DECREF(a);
+ if (b == NULL) return -1;
+
+ PyObject *c = PyLong_FromLongLong(ts.tv_sec);
+ if (c == NULL) {
+ Py_DECREF(b);
+ return -1;
+ }
+
+ a = PyNumber_Add(b, c);
+ Py_DECREF(b);
+ Py_DECREF(c);
+ py = a;
+ }
+ } else {
+ py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1);
}
if (!py)
return -1;
diff --git a/msgpack/unpack_container_header.h b/msgpack/unpack_container_header.h
new file mode 100644
index 00000000..c14a3c2b
--- /dev/null
+++ b/msgpack/unpack_container_header.h
@@ -0,0 +1,51 @@
+static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off)
+{
+ assert(len >= *off);
+ uint32_t size;
+ const unsigned char *const p = (unsigned char*)data + *off;
+
+#define inc_offset(inc) \
+ if (len - *off < inc) \
+ return 0; \
+ *off += inc;
+
+ switch (*p) {
+ case var_offset:
+ inc_offset(3);
+ size = _msgpack_load16(uint16_t, p + 1);
+ break;
+ case var_offset + 1:
+ inc_offset(5);
+ size = _msgpack_load32(uint32_t, p + 1);
+ break;
+#ifdef USE_CASE_RANGE
+ case fixed_offset + 0x0 ... fixed_offset + 0xf:
+#else
+ case fixed_offset + 0x0:
+ case fixed_offset + 0x1:
+ case fixed_offset + 0x2:
+ case fixed_offset + 0x3:
+ case fixed_offset + 0x4:
+ case fixed_offset + 0x5:
+ case fixed_offset + 0x6:
+ case fixed_offset + 0x7:
+ case fixed_offset + 0x8:
+ case fixed_offset + 0x9:
+ case fixed_offset + 0xa:
+ case fixed_offset + 0xb:
+ case fixed_offset + 0xc:
+ case fixed_offset + 0xd:
+ case fixed_offset + 0xe:
+ case fixed_offset + 0xf:
+#endif
+ ++*off;
+ size = ((unsigned int)*p) & 0x0f;
+ break;
+ default:
+ PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream");
+ return -1;
+ }
+ unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj);
+ return 1;
+}
+
diff --git a/msgpack/unpack_define.h b/msgpack/unpack_define.h
index c81b990c..0dd708d1 100644
--- a/msgpack/unpack_define.h
+++ b/msgpack/unpack_define.h
@@ -34,6 +34,7 @@ extern "C" {
#endif
+// CS is first byte & 0x1f
typedef enum {
CS_HEADER = 0x00, // nil
@@ -41,13 +42,14 @@ typedef enum {
//CS_ = 0x02, // false
//CS_ = 0x03, // true
- //CS_ = 0x04,
- //CS_ = 0x05,
- //CS_ = 0x06,
- //CS_ = 0x07,
+ CS_BIN_8 = 0x04,
+ CS_BIN_16 = 0x05,
+ CS_BIN_32 = 0x06,
+
+ CS_EXT_8 = 0x07,
+ CS_EXT_16 = 0x08,
+ CS_EXT_32 = 0x09,
- //CS_ = 0x08,
- //CS_ = 0x09,
CS_FLOAT = 0x0a,
CS_DOUBLE = 0x0b,
CS_UINT_8 = 0x0c,
@@ -59,12 +61,13 @@ typedef enum {
CS_INT_32 = 0x12,
CS_INT_64 = 0x13,
- //CS_ = 0x14,
- //CS_ = 0x15,
- //CS_BIG_INT_16 = 0x16,
- //CS_BIG_INT_32 = 0x17,
- //CS_BIG_FLOAT_16 = 0x18,
- //CS_BIG_FLOAT_32 = 0x19,
+ //CS_FIXEXT1 = 0x14,
+ //CS_FIXEXT2 = 0x15,
+ //CS_FIXEXT4 = 0x16,
+ //CS_FIXEXT8 = 0x17,
+ //CS_FIXEXT16 = 0x18,
+
+ CS_RAW_8 = 0x19,
CS_RAW_16 = 0x1a,
CS_RAW_32 = 0x1b,
CS_ARRAY_16 = 0x1c,
@@ -72,9 +75,9 @@ typedef enum {
CS_MAP_16 = 0x1e,
CS_MAP_32 = 0x1f,
- //ACS_BIG_INT_VALUE,
- //ACS_BIG_FLOAT_VALUE,
ACS_RAW_VALUE,
+ ACS_BIN_VALUE,
+ ACS_EXT_VALUE,
} msgpack_unpack_state;
@@ -90,4 +93,3 @@ typedef enum {
#endif
#endif /* msgpack/unpack_define.h */
-
diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h
index 29ac935b..cce29e7a 100644
--- a/msgpack/unpack_template.h
+++ b/msgpack/unpack_template.h
@@ -24,8 +24,8 @@
typedef struct unpack_stack {
PyObject* obj;
- size_t size;
- size_t count;
+ Py_ssize_t size;
+ Py_ssize_t count;
unsigned int ct;
PyObject* map_key;
} unpack_stack;
@@ -70,15 +70,18 @@ static inline PyObject* unpack_data(unpack_context* ctx)
return (ctx)->stack[0].obj;
}
+static inline void unpack_clear(unpack_context *ctx)
+{
+ Py_CLEAR(ctx->stack[0].obj);
+}
-template
-static inline int unpack_execute(unpack_context* ctx, const char* data, size_t len, size_t* off)
+static inline int unpack_execute(bool construct, unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off)
{
assert(len >= *off);
const unsigned char* p = (unsigned char*)data + *off;
const unsigned char* const pe = (unsigned char*)data + len;
- const void* n = NULL;
+ const void* n = p;
unsigned int trail = ctx->trail;
unsigned int cs = ctx->cs;
@@ -89,7 +92,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
*/
unpack_user* user = &ctx->user;
- PyObject* obj;
+ PyObject* obj = NULL;
unpack_stack* c = NULL;
int ret;
@@ -119,7 +122,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
goto _fixed_trail_again
#define start_container(func, count_, ct_) \
- if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \
+ if(top >= MSGPACK_EMBED_STACK_SIZE) { ret = -3; goto _end; } \
if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \
if((count_) == 0) { obj = stack[top].obj; \
if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \
@@ -128,31 +131,9 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
stack[top].size = count_; \
stack[top].count = 0; \
++top; \
- /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \
- /*printf("stack push %d\n", top);*/ \
- /* FIXME \
- if(top >= stack_size) { \
- if(stack_size == MSGPACK_EMBED_STACK_SIZE) { \
- size_t csize = sizeof(unpack_stack) * MSGPACK_EMBED_STACK_SIZE; \
- size_t nsize = csize * 2; \
- unpack_stack* tmp = (unpack_stack*)malloc(nsize); \
- if(tmp == NULL) { goto _failed; } \
- memcpy(tmp, ctx->stack, csize); \
- ctx->stack = stack = tmp; \
- ctx->stack_size = stack_size = MSGPACK_EMBED_STACK_SIZE * 2; \
- } else { \
- size_t nsize = sizeof(unpack_stack) * ctx->stack_size * 2; \
- unpack_stack* tmp = (unpack_stack*)realloc(ctx->stack, nsize); \
- if(tmp == NULL) { goto _failed; } \
- ctx->stack = stack = tmp; \
- ctx->stack_size = stack_size = stack_size * 2; \
- } \
- } \
- */ \
goto _header_again
-#define NEXT_CS(p) \
- ((unsigned int)*p & 0x1f)
+#define NEXT_CS(p) ((unsigned int)*p & 0x1f)
#ifdef USE_CASE_RANGE
#define SWITCH_RANGE_BEGIN switch(*p) {
@@ -179,18 +160,23 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
switch(*p) {
case 0xc0: // nil
push_simple_value(_nil);
- //case 0xc1: // string
- // again_terminal_trail(NEXT_CS(p), p+1);
+ //case 0xc1: // never used
case 0xc2: // false
push_simple_value(_false);
case 0xc3: // true
push_simple_value(_true);
- //case 0xc4:
- //case 0xc5:
- //case 0xc6:
- //case 0xc7:
- //case 0xc8:
- //case 0xc9:
+ case 0xc4: // bin 8
+ again_fixed_trail(NEXT_CS(p), 1);
+ case 0xc5: // bin 16
+ again_fixed_trail(NEXT_CS(p), 2);
+ case 0xc6: // bin 32
+ again_fixed_trail(NEXT_CS(p), 4);
+ case 0xc7: // ext 8
+ again_fixed_trail(NEXT_CS(p), 1);
+ case 0xc8: // ext 16
+ again_fixed_trail(NEXT_CS(p), 2);
+ case 0xc9: // ext 32
+ again_fixed_trail(NEXT_CS(p), 4);
case 0xca: // float
case 0xcb: // double
case 0xcc: // unsigned int 8
@@ -202,12 +188,17 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case 0xd2: // signed int 32
case 0xd3: // signed int 64
again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03));
- //case 0xd4:
- //case 0xd5:
- //case 0xd6: // big integer 16
- //case 0xd7: // big integer 32
- //case 0xd8: // big float 16
- //case 0xd9: // big float 32
+ case 0xd4: // fixext 1
+ case 0xd5: // fixext 2
+ case 0xd6: // fixext 4
+ case 0xd7: // fixext 8
+ again_fixed_trail_if_zero(ACS_EXT_VALUE,
+ (1 << (((unsigned int)*p) & 0x03))+1,
+ _ext_zero);
+ case 0xd8: // fixext 16
+ again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero);
+ case 0xd9: // str 8
+ again_fixed_trail(NEXT_CS(p), 1);
case 0xda: // raw 16
case 0xdb: // raw 32
case 0xdc: // array 16
@@ -216,7 +207,8 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case 0xdf: // map 32
again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01));
default:
- goto _failed;
+ ret = -2;
+ goto _end;
}
SWITCH_RANGE(0xa0, 0xbf) // FixRaw
again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero);
@@ -226,7 +218,8 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY);
SWITCH_RANGE_DEFAULT
- goto _failed;
+ ret = -2;
+ goto _end;
SWITCH_RANGE_END
// end CS_HEADER
@@ -238,20 +231,32 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
if((size_t)(pe - p) < trail) { goto _out; }
n = p; p += trail - 1;
switch(cs) {
- //case CS_
- //case CS_
+ case CS_EXT_8:
+ again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero);
+ case CS_EXT_16:
+ again_fixed_trail_if_zero(ACS_EXT_VALUE,
+ _msgpack_load16(uint16_t,n)+1,
+ _ext_zero);
+ case CS_EXT_32:
+ again_fixed_trail_if_zero(ACS_EXT_VALUE,
+ _msgpack_load32(uint32_t,n)+1,
+ _ext_zero);
case CS_FLOAT: {
- union { uint32_t i; float f; } mem;
- mem.i = _msgpack_load32(uint32_t,n);
- push_fixed_value(_float, mem.f); }
+ double f;
+#if PY_VERSION_HEX >= 0x030B00A7
+ f = PyFloat_Unpack4((const char*)n, 0);
+#else
+ f = _PyFloat_Unpack4((unsigned char*)n, 0);
+#endif
+ push_fixed_value(_float, f); }
case CS_DOUBLE: {
- union { uint64_t i; double f; } mem;
- mem.i = _msgpack_load64(uint64_t,n);
-#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi
- // https://github.com/msgpack/msgpack-perl/pull/1
- mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL);
+ double f;
+#if PY_VERSION_HEX >= 0x030B00A7
+ f = PyFloat_Unpack8((const char*)n, 0);
+#else
+ f = _PyFloat_Unpack8((unsigned char*)n, 0);
#endif
- push_fixed_value(_double, mem.f); }
+ push_fixed_value(_double, f); }
case CS_UINT_8:
push_fixed_value(_uint8, *(uint8_t*)n);
case CS_UINT_16:
@@ -270,26 +275,18 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case CS_INT_64:
push_fixed_value(_int64, _msgpack_load64(int64_t,n));
- //case CS_
- //case CS_
- //case CS_BIG_INT_16:
- // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load16(uint16_t,n), _big_int_zero);
- //case CS_BIG_INT_32:
- // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load32(uint32_t,n), _big_int_zero);
- //case ACS_BIG_INT_VALUE:
- //_big_int_zero:
- // // FIXME
- // push_variable_value(_big_int, data, n, trail);
-
- //case CS_BIG_FLOAT_16:
- // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load16(uint16_t,n), _big_float_zero);
- //case CS_BIG_FLOAT_32:
- // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load32(uint32_t,n), _big_float_zero);
- //case ACS_BIG_FLOAT_VALUE:
- //_big_float_zero:
- // // FIXME
- // push_variable_value(_big_float, data, n, trail);
-
+ case CS_BIN_8:
+ again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero);
+ case CS_BIN_16:
+ again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load16(uint16_t,n), _bin_zero);
+ case CS_BIN_32:
+ again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load32(uint32_t,n), _bin_zero);
+ case ACS_BIN_VALUE:
+ _bin_zero:
+ push_variable_value(_bin, data, n, trail);
+
+ case CS_RAW_8:
+ again_fixed_trail_if_zero(ACS_RAW_VALUE, *(uint8_t*)n, _raw_zero);
case CS_RAW_16:
again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load16(uint16_t,n), _raw_zero);
case CS_RAW_32:
@@ -298,6 +295,10 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
_raw_zero:
push_variable_value(_raw, data, n, trail);
+ case ACS_EXT_VALUE:
+ _ext_zero:
+ push_variable_value(_ext, data, n, trail);
+
case CS_ARRAY_16:
start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM);
case CS_ARRAY_32:
@@ -384,6 +385,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
#undef construct_cb
}
+#undef NEXT_CS
#undef SWITCH_RANGE_BEGIN
#undef SWITCH_RANGE
#undef SWITCH_RANGE_DEFAULT
@@ -395,68 +397,27 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
#undef again_fixed_trail_if_zero
#undef start_container
-template
-static inline int unpack_container_header(unpack_context* ctx, const char* data, size_t len, size_t* off)
-{
- assert(len >= *off);
- uint32_t size;
- const unsigned char *const p = (unsigned char*)data + *off;
-
-#define inc_offset(inc) \
- if (len - *off < inc) \
- return 0; \
- *off += inc;
-
- switch (*p) {
- case var_offset:
- inc_offset(3);
- size = _msgpack_load16(uint16_t, p + 1);
- break;
- case var_offset + 1:
- inc_offset(5);
- size = _msgpack_load32(uint32_t, p + 1);
- break;
-#ifdef USE_CASE_RANGE
- case fixed_offset + 0x0 ... fixed_offset + 0xf:
-#else
- case fixed_offset + 0x0:
- case fixed_offset + 0x1:
- case fixed_offset + 0x2:
- case fixed_offset + 0x3:
- case fixed_offset + 0x4:
- case fixed_offset + 0x5:
- case fixed_offset + 0x6:
- case fixed_offset + 0x7:
- case fixed_offset + 0x8:
- case fixed_offset + 0x9:
- case fixed_offset + 0xa:
- case fixed_offset + 0xb:
- case fixed_offset + 0xc:
- case fixed_offset + 0xd:
- case fixed_offset + 0xe:
- case fixed_offset + 0xf:
-#endif
- ++*off;
- size = ((unsigned int)*p) & 0x0f;
- break;
- default:
- PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream");
- return -1;
- }
- unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj);
- return 1;
+static int unpack_construct(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) {
+ return unpack_execute(1, ctx, data, len, off);
+}
+static int unpack_skip(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) {
+ return unpack_execute(0, ctx, data, len, off);
}
-#undef SWITCH_RANGE_BEGIN
-#undef SWITCH_RANGE
-#undef SWITCH_RANGE_DEFAULT
-#undef SWITCH_RANGE_END
-
-static const execute_fn unpack_construct = &unpack_execute;
-static const execute_fn unpack_skip = &unpack_execute;
-static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>;
-static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>;
-
-#undef NEXT_CS
+#define unpack_container_header read_array_header
+#define fixed_offset 0x90
+#define var_offset 0xdc
+#include "unpack_container_header.h"
+#undef unpack_container_header
+#undef fixed_offset
+#undef var_offset
+
+#define unpack_container_header read_map_header
+#define fixed_offset 0x80
+#define var_offset 0xde
+#include "unpack_container_header.h"
+#undef unpack_container_header
+#undef fixed_offset
+#undef var_offset
/* vim: set ts=4 sw=4 sts=4 expandtab */
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..b1628322
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,55 @@
+[build-system]
+# 75.3.0 is the latest version supporting Python 3.8
+requires = ["setuptools >= 75.3.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "msgpack"
+dynamic = ["version"]
+# `license = "Apache-2.0"` is preferred. But keep old syntax for Python 3.8 compatibility.
+# https://github.com/msgpack/msgpack-python/pull/637
+license = {text="Apache 2.0"}
+authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}]
+description = "MessagePack serializer"
+readme = "README.md"
+keywords = ["msgpack", "messagepack", "serializer", "serialization", "binary"]
+requires-python = ">=3.8"
+classifiers = [
+ "Development Status :: 5 - Production/Stable",
+ "Operating System :: OS Independent",
+ "Topic :: File Formats",
+ "Intended Audience :: Developers",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
+ "Programming Language :: Python :: Implementation :: CPython",
+ "Programming Language :: Python :: Implementation :: PyPy",
+]
+
+[project.urls]
+Homepage = "https://msgpack.org/"
+Documentation = "https://msgpack-python.readthedocs.io/"
+Repository = "https://github.com/msgpack/msgpack-python/"
+Tracker = "https://github.com/msgpack/msgpack-python/issues"
+Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst"
+
+[tool.setuptools]
+# Do not install C/C++/Cython source files
+include-package-data = false
+
+[tool.setuptools.dynamic]
+version = {attr = "msgpack.__version__"}
+
+[tool.ruff]
+line-length = 100
+target-version = "py38"
+lint.select = [
+ "E", # pycodestyle
+ "F", # Pyflakes
+ "I", # isort
+ #"UP", pyupgrade
+]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..78a2f38f
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+Cython~=3.1.1
diff --git a/setup.py b/setup.py
index 1055a61c..4029e9ed 100644
--- a/setup.py
+++ b/setup.py
@@ -1,122 +1,32 @@
#!/usr/bin/env python
-# coding: utf-8
import os
import sys
-from glob import glob
-from distutils.command.sdist import sdist
-from setuptools import setup, Extension
-from distutils.command.build_ext import build_ext
+from setuptools import Extension, setup
-class NoCython(Exception):
- pass
-
-try:
- import Cython.Compiler.Main as cython_compiler
- have_cython = True
-except ImportError:
- have_cython = False
-
-
-def cythonize(src):
- sys.stderr.write("cythonize: %r\n" % (src,))
- cython_compiler.compile([src], cplus=True, emit_linenums=True)
-
-def ensure_source(src):
- pyx = os.path.splitext(src)[0] + '.pyx'
-
- if not os.path.exists(src):
- if not have_cython:
- raise NoCython
- cythonize(pyx)
- elif (os.path.exists(pyx) and
- os.stat(src).st_mtime < os.stat(pyx).st_mtime and
- have_cython):
- cythonize(pyx)
- return src
-
-
-class BuildExt(build_ext):
- def build_extension(self, ext):
- try:
- ext.sources = list(map(ensure_source, ext.sources))
- except NoCython:
- print("WARNING")
- print("Cython is required for building extension from checkout.")
- print("Install Cython >= 0.16 or install msgpack from PyPI.")
- print("Falling back to pure Python implementation.")
- return
- try:
- return build_ext.build_extension(self, ext)
- except Exception as e:
- print("WARNING: Failed to compile extensiom modules.")
- print("msgpack uses fallback pure python implementation.")
- print(e)
-
-
-exec(open('msgpack/_version.py').read())
-
-version_str = '.'.join(str(x) for x in version[:3])
-if len(version) > 3 and version[3] != 'final':
- version_str += version[3]
-
-# take care of extension modules.
-if have_cython:
- class Sdist(sdist):
- def __init__(self, *args, **kwargs):
- for src in glob('msgpack/*.pyx'):
- cythonize(src)
- sdist.__init__(self, *args, **kwargs)
-else:
- Sdist = sdist
+PYPY = hasattr(sys, "pypy_version_info")
libraries = []
-if sys.platform == 'win32':
- libraries.append('ws2_32')
-
-if sys.byteorder == 'big':
- macros = [('__BIG_ENDIAN__', '1')]
-else:
- macros = [('__LITTLE_ENDIAN__', '1')]
-
+macros = []
ext_modules = []
-if not hasattr(sys, 'pypy_version_info'):
- ext_modules.append(Extension('msgpack._packer',
- sources=['msgpack/_packer.cpp'],
- libraries=libraries,
- include_dirs=['.'],
- define_macros=macros,
- ))
- ext_modules.append(Extension('msgpack._unpacker',
- sources=['msgpack/_unpacker.cpp'],
- libraries=libraries,
- include_dirs=['.'],
- define_macros=macros,
- ))
-del libraries, macros
+if sys.platform == "win32":
+ libraries.append("ws2_32")
+ macros = [("__LITTLE_ENDIAN__", "1")]
+
+if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"):
+ ext_modules.append(
+ Extension(
+ "msgpack._cmsgpack",
+ sources=["msgpack/_cmsgpack.c"],
+ libraries=libraries,
+ include_dirs=["."],
+ define_macros=macros,
+ )
+ )
+del libraries, macros
-desc = 'MessagePack (de)serializer.'
-f = open('README.rst')
-long_desc = f.read()
-f.close()
-del f
-
-setup(name='msgpack-python',
- author='INADA Naoki',
- author_email='songofacandy@gmail.com',
- version=version_str,
- cmdclass={'build_ext': BuildExt, 'sdist': Sdist},
- ext_modules=ext_modules,
- packages=['msgpack'],
- description=desc,
- long_description=long_desc,
- url='http://msgpack.org/',
- download_url='http://pypi.python.org/pypi/msgpack/',
- classifiers=[
- 'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 3',
- 'Intended Audience :: Developers',
- 'License :: OSI Approved :: Apache Software License',
- ]
- )
+setup(
+ ext_modules=ext_modules,
+ packages=["msgpack"],
+)
diff --git a/test/test_buffer.py b/test/test_buffer.py
index 04cc02d9..2c5a14c5 100644
--- a/test/test_buffer.py
+++ b/test/test_buffer.py
@@ -1,13 +1,49 @@
-#!/usr/bin/env python
-# coding: utf-8
+from pytest import raises
-from msgpack import packb, unpackb
+from msgpack import Packer, packb, unpackb
def test_unpack_buffer():
from array import array
- buf = array('b')
- buf.fromstring(packb(('foo', 'bar')))
+
+ buf = array("b")
+ buf.frombytes(packb((b"foo", b"bar")))
obj = unpackb(buf, use_list=1)
- assert [b'foo', b'bar'] == obj
+ assert [b"foo", b"bar"] == obj
+
+
+def test_unpack_bytearray():
+ buf = bytearray(packb((b"foo", b"bar")))
+ obj = unpackb(buf, use_list=1)
+ assert [b"foo", b"bar"] == obj
+ expected_type = bytes
+ assert all(type(s) == expected_type for s in obj)
+
+
+def test_unpack_memoryview():
+ buf = bytearray(packb((b"foo", b"bar")))
+ view = memoryview(buf)
+ obj = unpackb(view, use_list=1)
+ assert [b"foo", b"bar"] == obj
+ expected_type = bytes
+ assert all(type(s) == expected_type for s in obj)
+
+
+def test_packer_getbuffer():
+ packer = Packer(autoreset=False)
+ packer.pack_array_header(2)
+ packer.pack(42)
+ packer.pack("hello")
+ buffer = packer.getbuffer()
+ assert isinstance(buffer, memoryview)
+ assert bytes(buffer) == b"\x92*\xa5hello"
+
+ if Packer.__module__ == "msgpack._cmsgpack": # only for Cython
+ # cython Packer supports buffer protocol directly
+ assert bytes(packer) == b"\x92*\xa5hello"
+ with raises(BufferError):
+ packer.pack(42)
+ buffer.release()
+ packer.pack(42)
+ assert bytes(packer) == b"\x92*\xa5hello*"
diff --git a/test/test_case.py b/test/test_case.py
index 5a4bb6c4..c4c615e3 100644
--- a/test/test_case.py
+++ b/test/test_case.py
@@ -1,102 +1,136 @@
#!/usr/bin/env python
-# coding: utf-8
-
from msgpack import packb, unpackb
-def check(length, obj):
- v = packb(obj)
- assert len(v) == length, \
- "%r length should be %r but get %r" % (obj, length, len(v))
- assert unpackb(v, use_list=0) == obj
+def check(length, obj, use_bin_type=True):
+ v = packb(obj, use_bin_type=use_bin_type)
+ assert len(v) == length, f"{obj!r} length should be {length!r} but get {len(v)!r}"
+ assert unpackb(v, use_list=0, raw=not use_bin_type) == obj
+
def test_1():
- for o in [None, True, False, 0, 1, (1 << 6), (1 << 7) - 1, -1,
- -((1<<5)-1), -(1<<5)]:
+ for o in [
+ None,
+ True,
+ False,
+ 0,
+ 1,
+ (1 << 6),
+ (1 << 7) - 1,
+ -1,
+ -((1 << 5) - 1),
+ -(1 << 5),
+ ]:
check(1, o)
+
def test_2():
- for o in [1 << 7, (1 << 8) - 1,
- -((1<<5)+1), -(1<<7)
- ]:
+ for o in [1 << 7, (1 << 8) - 1, -((1 << 5) + 1), -(1 << 7)]:
check(2, o)
+
def test_3():
- for o in [1 << 8, (1 << 16) - 1,
- -((1<<7)+1), -(1<<15)]:
+ for o in [1 << 8, (1 << 16) - 1, -((1 << 7) + 1), -(1 << 15)]:
check(3, o)
+
def test_5():
- for o in [1 << 16, (1 << 32) - 1,
- -((1<<15)+1), -(1<<31)]:
+ for o in [1 << 16, (1 << 32) - 1, -((1 << 15) + 1), -(1 << 31)]:
check(5, o)
+
def test_9():
- for o in [1 << 32, (1 << 64) - 1,
- -((1<<31)+1), -(1<<63),
- 1.0, 0.1, -0.1, -1.0]:
+ for o in [
+ 1 << 32,
+ (1 << 64) - 1,
+ -((1 << 31) + 1),
+ -(1 << 63),
+ 1.0,
+ 0.1,
+ -0.1,
+ -1.0,
+ ]:
check(9, o)
def check_raw(overhead, num):
- check(num + overhead, b" " * num)
+ check(num + overhead, b" " * num, use_bin_type=False)
+
def test_fixraw():
check_raw(1, 0)
- check_raw(1, (1<<5) - 1)
+ check_raw(1, (1 << 5) - 1)
+
def test_raw16():
- check_raw(3, 1<<5)
- check_raw(3, (1<<16) - 1)
+ check_raw(3, 1 << 5)
+ check_raw(3, (1 << 16) - 1)
+
def test_raw32():
- check_raw(5, 1<<16)
+ check_raw(5, 1 << 16)
def check_array(overhead, num):
check(num + overhead, (None,) * num)
+
def test_fixarray():
check_array(1, 0)
check_array(1, (1 << 4) - 1)
+
def test_array16():
check_array(3, 1 << 4)
- check_array(3, (1<<16)-1)
+ check_array(3, (1 << 16) - 1)
+
def test_array32():
- check_array(5, (1<<16))
+ check_array(5, (1 << 16))
def match(obj, buf):
assert packb(obj) == buf
- assert unpackb(buf, use_list=0) == obj
+ assert unpackb(buf, use_list=0, strict_map_key=False) == obj
+
def test_match():
cases = [
- (None, b'\xc0'),
- (False, b'\xc2'),
- (True, b'\xc3'),
- (0, b'\x00'),
- (127, b'\x7f'),
- (128, b'\xcc\x80'),
- (256, b'\xcd\x01\x00'),
- (-1, b'\xff'),
- (-33, b'\xd0\xdf'),
- (-129, b'\xd1\xff\x7f'),
- ({1:1}, b'\x81\x01\x01'),
+ (None, b"\xc0"),
+ (False, b"\xc2"),
+ (True, b"\xc3"),
+ (0, b"\x00"),
+ (127, b"\x7f"),
+ (128, b"\xcc\x80"),
+ (256, b"\xcd\x01\x00"),
+ (-1, b"\xff"),
+ (-33, b"\xd0\xdf"),
+ (-129, b"\xd1\xff\x7f"),
+ ({1: 1}, b"\x81\x01\x01"),
(1.0, b"\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"),
- ((), b'\x90'),
- (tuple(range(15)),b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e"),
- (tuple(range(16)),b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"),
- ({}, b'\x80'),
- (dict([(x,x) for x in range(15)]), b'\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e'),
- (dict([(x,x) for x in range(16)]), b'\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f'),
- ]
+ ((), b"\x90"),
+ (
+ tuple(range(15)),
+ b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e",
+ ),
+ (
+ tuple(range(16)),
+ b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ ),
+ ({}, b"\x80"),
+ (
+ {x: x for x in range(15)},
+ b"\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e",
+ ),
+ (
+ {x: x for x in range(16)},
+ b"\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f",
+ ),
+ ]
for v, p in cases:
match(v, p)
-def test_unicode():
- assert unpackb(packb('foobar'), use_list=1) == b'foobar'
+def test_unicode():
+ assert unpackb(packb("foobar"), use_list=1) == "foobar"
diff --git a/test/test_except.py b/test/test_except.py
index 361d4ea3..b77ac800 100644
--- a/test/test_except.py
+++ b/test/test_except.py
@@ -1,10 +1,10 @@
#!/usr/bin/env python
-# coding: utf-8
+
+import datetime
from pytest import raises
-from msgpack import packb, unpackb
-import datetime
+from msgpack import FormatError, OutOfData, StackError, Unpacker, packb, unpackb
class DummyException(Exception):
@@ -19,13 +19,45 @@ def test_raise_on_find_unsupported_value():
def test_raise_from_object_hook():
def hook(obj):
raise DummyException
+
raises(DummyException, unpackb, packb({}), object_hook=hook)
- raises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_hook=hook)
- raises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_pairs_hook=hook)
- raises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_hook=hook)
- raises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook)
+ raises(DummyException, unpackb, packb({"fizz": "buzz"}), object_hook=hook)
+ raises(DummyException, unpackb, packb({"fizz": "buzz"}), object_pairs_hook=hook)
+ raises(DummyException, unpackb, packb({"fizz": {"buzz": "spam"}}), object_hook=hook)
+ raises(
+ DummyException,
+ unpackb,
+ packb({"fizz": {"buzz": "spam"}}),
+ object_pairs_hook=hook,
+ )
def test_invalidvalue():
+ incomplete = b"\xd9\x97#DL_" # raw8 - length=0x97
+ with raises(ValueError):
+ unpackb(incomplete)
+
+ with raises(OutOfData):
+ unpacker = Unpacker()
+ unpacker.feed(incomplete)
+ unpacker.unpack()
+
+ with raises(FormatError):
+ unpackb(b"\xc1") # (undefined tag)
+
+ with raises(FormatError):
+ unpackb(b"\x91\xc1") # fixarray(len=1) [ (undefined tag) ]
+
+ with raises(StackError):
+ unpackb(b"\x91" * 3000) # nested fixarray(len=1)
+
+
+def test_strict_map_key():
+ valid = {"unicode": 1, b"bytes": 2}
+ packed = packb(valid, use_bin_type=True)
+ assert valid == unpackb(packed, raw=False, strict_map_key=True)
+
+ invalid = {42: 1}
+ packed = packb(invalid, use_bin_type=True)
with raises(ValueError):
- unpackb(b'\xd9\x97#DL_')
+ unpackb(packed, raw=False, strict_map_key=True)
diff --git a/test/test_extension.py b/test/test_extension.py
new file mode 100644
index 00000000..aaf0fd92
--- /dev/null
+++ b/test/test_extension.py
@@ -0,0 +1,78 @@
+import array
+
+import msgpack
+from msgpack import ExtType
+
+
+def test_pack_ext_type():
+ def p(s):
+ packer = msgpack.Packer()
+ packer.pack_ext_type(0x42, s)
+ return packer.bytes()
+
+ assert p(b"A") == b"\xd4\x42A" # fixext 1
+ assert p(b"AB") == b"\xd5\x42AB" # fixext 2
+ assert p(b"ABCD") == b"\xd6\x42ABCD" # fixext 4
+ assert p(b"ABCDEFGH") == b"\xd7\x42ABCDEFGH" # fixext 8
+ assert p(b"A" * 16) == b"\xd8\x42" + b"A" * 16 # fixext 16
+ assert p(b"ABC") == b"\xc7\x03\x42ABC" # ext 8
+ assert p(b"A" * 0x0123) == b"\xc8\x01\x23\x42" + b"A" * 0x0123 # ext 16
+ assert p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 # ext 32
+
+
+def test_unpack_ext_type():
+ def check(b, expected):
+ assert msgpack.unpackb(b) == expected
+
+ check(b"\xd4\x42A", ExtType(0x42, b"A")) # fixext 1
+ check(b"\xd5\x42AB", ExtType(0x42, b"AB")) # fixext 2
+ check(b"\xd6\x42ABCD", ExtType(0x42, b"ABCD")) # fixext 4
+ check(b"\xd7\x42ABCDEFGH", ExtType(0x42, b"ABCDEFGH")) # fixext 8
+ check(b"\xd8\x42" + b"A" * 16, ExtType(0x42, b"A" * 16)) # fixext 16
+ check(b"\xc7\x03\x42ABC", ExtType(0x42, b"ABC")) # ext 8
+ check(b"\xc8\x01\x23\x42" + b"A" * 0x0123, ExtType(0x42, b"A" * 0x0123)) # ext 16
+ check(
+ b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345,
+ ExtType(0x42, b"A" * 0x00012345),
+ ) # ext 32
+
+
+def test_extension_type():
+ def default(obj):
+ print("default called", obj)
+ if isinstance(obj, array.array):
+ typecode = 123 # application specific typecode
+ try:
+ data = obj.tobytes()
+ except AttributeError:
+ data = obj.tostring()
+ return ExtType(typecode, data)
+ raise TypeError(f"Unknown type object {obj!r}")
+
+ def ext_hook(code, data):
+ print("ext_hook called", code, data)
+ assert code == 123
+ obj = array.array("d")
+ obj.frombytes(data)
+ return obj
+
+ obj = [42, b"hello", array.array("d", [1.1, 2.2, 3.3])]
+ s = msgpack.packb(obj, default=default)
+ obj2 = msgpack.unpackb(s, ext_hook=ext_hook)
+ assert obj == obj2
+
+
+def test_overriding_hooks():
+ def default(obj):
+ if isinstance(obj, int):
+ return {"__type__": "long", "__data__": str(obj)}
+ else:
+ return obj
+
+ obj = {"testval": 1823746192837461928374619}
+ refobj = {"testval": default(obj["testval"])}
+ refout = msgpack.packb(refobj)
+ assert isinstance(refout, (str, bytes))
+ testout = msgpack.packb(obj, default=default)
+
+ assert refout == testout
diff --git a/test/test_format.py b/test/test_format.py
index 5fec0c3a..c06c87dc 100644
--- a/test/test_format.py
+++ b/test/test_format.py
@@ -1,70 +1,88 @@
#!/usr/bin/env python
-# coding: utf-8
from msgpack import unpackb
-def check(src, should, use_list=0):
- assert unpackb(src, use_list=use_list) == should
+
+def check(src, should, use_list=0, raw=True):
+ assert unpackb(src, use_list=use_list, raw=raw, strict_map_key=False) == should
+
def testSimpleValue():
- check(b"\x93\xc0\xc2\xc3",
- (None, False, True,))
+ check(b"\x93\xc0\xc2\xc3", (None, False, True))
+
def testFixnum():
- check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff",
- ((0,64,127,), (-32,-16,-1,),)
- )
+ check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", ((0, 64, 127), (-32, -16, -1)))
+
def testFixArray():
- check(b"\x92\x90\x91\x91\xc0",
- ((),((None,),),),
- )
+ check(b"\x92\x90\x91\x91\xc0", ((), ((None,),)))
+
def testFixRaw():
- check(b"\x94\xa0\xa1a\xa2bc\xa3def",
- (b"", b"a", b"bc", b"def",),
- )
+ check(b"\x94\xa0\xa1a\xa2bc\xa3def", (b"", b"a", b"bc", b"def"))
+
def testFixMap():
- check(
- b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80",
- {False: {None: None}, True:{None:{}}},
- )
+ check(b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}})
+
def testUnsignedInt():
check(
- b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00"
- b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00"
- b"\xce\xff\xff\xff\xff",
- (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,),
- )
+ b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00"
+ b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00"
+ b"\xce\xff\xff\xff\xff",
+ (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295),
+ )
+
def testSignedInt():
- check(b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00"
- b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00"
- b"\xd2\xff\xff\xff\xff",
- (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,))
+ check(
+ b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00"
+ b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00"
+ b"\xd2\xff\xff\xff\xff",
+ (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1),
+ )
+
def testRaw():
- check(b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00"
+ check(
+ b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00"
+ b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
+ (b"", b"a", b"ab", b"", b"a", b"ab"),
+ )
+ check(
+ b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00"
b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
- (b"", b"a", b"ab", b"", b"a", b"ab"))
+ ("", "a", "ab", "", "a", "ab"),
+ raw=False,
+ )
+
def testArray():
- check(b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00"
+ check(
+ b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00"
b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02"
b"\xc2\xc3",
- ((), (None,), (False,True), (), (None,), (False,True))
- )
+ ((), (None,), (False, True), (), (None,), (False, True)),
+ )
+
def testMap():
check(
b"\x96"
- b"\xde\x00\x00"
- b"\xde\x00\x01\xc0\xc2"
- b"\xde\x00\x02\xc0\xc2\xc3\xc2"
- b"\xdf\x00\x00\x00\x00"
- b"\xdf\x00\x00\x00\x01\xc0\xc2"
- b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2",
- ({}, {None: False}, {True: False, None: False}, {},
- {None: False}, {True: False, None: False}))
+ b"\xde\x00\x00"
+ b"\xde\x00\x01\xc0\xc2"
+ b"\xde\x00\x02\xc0\xc2\xc3\xc2"
+ b"\xdf\x00\x00\x00\x00"
+ b"\xdf\x00\x00\x00\x01\xc0\xc2"
+ b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2",
+ (
+ {},
+ {None: False},
+ {True: False, None: False},
+ {},
+ {None: False},
+ {True: False, None: False},
+ ),
+ )
diff --git a/test/test_limits.py b/test/test_limits.py
new file mode 100644
index 00000000..9b92b4d9
--- /dev/null
+++ b/test/test_limits.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+import pytest
+
+from msgpack import (
+ ExtType,
+ Packer,
+ PackOverflowError,
+ PackValueError,
+ Unpacker,
+ UnpackValueError,
+ packb,
+ unpackb,
+)
+
+
+def test_integer():
+ x = -(2**63)
+ assert unpackb(packb(x)) == x
+ with pytest.raises(PackOverflowError):
+ packb(x - 1)
+
+ x = 2**64 - 1
+ assert unpackb(packb(x)) == x
+ with pytest.raises(PackOverflowError):
+ packb(x + 1)
+
+
+def test_array_header():
+ packer = Packer()
+ packer.pack_array_header(2**32 - 1)
+ with pytest.raises(PackValueError):
+ packer.pack_array_header(2**32)
+
+
+def test_map_header():
+ packer = Packer()
+ packer.pack_map_header(2**32 - 1)
+ with pytest.raises(PackValueError):
+ packer.pack_array_header(2**32)
+
+
+def test_max_str_len():
+ d = "x" * 3
+ packed = packb(d)
+
+ unpacker = Unpacker(max_str_len=3, raw=False)
+ unpacker.feed(packed)
+ assert unpacker.unpack() == d
+
+ unpacker = Unpacker(max_str_len=2, raw=False)
+ with pytest.raises(UnpackValueError):
+ unpacker.feed(packed)
+ unpacker.unpack()
+
+
+def test_max_bin_len():
+ d = b"x" * 3
+ packed = packb(d, use_bin_type=True)
+
+ unpacker = Unpacker(max_bin_len=3)
+ unpacker.feed(packed)
+ assert unpacker.unpack() == d
+
+ unpacker = Unpacker(max_bin_len=2)
+ with pytest.raises(UnpackValueError):
+ unpacker.feed(packed)
+ unpacker.unpack()
+
+
+def test_max_array_len():
+ d = [1, 2, 3]
+ packed = packb(d)
+
+ unpacker = Unpacker(max_array_len=3)
+ unpacker.feed(packed)
+ assert unpacker.unpack() == d
+
+ unpacker = Unpacker(max_array_len=2)
+ with pytest.raises(UnpackValueError):
+ unpacker.feed(packed)
+ unpacker.unpack()
+
+
+def test_max_map_len():
+ d = {1: 2, 3: 4, 5: 6}
+ packed = packb(d)
+
+ unpacker = Unpacker(max_map_len=3, strict_map_key=False)
+ unpacker.feed(packed)
+ assert unpacker.unpack() == d
+
+ unpacker = Unpacker(max_map_len=2, strict_map_key=False)
+ with pytest.raises(UnpackValueError):
+ unpacker.feed(packed)
+ unpacker.unpack()
+
+
+def test_max_ext_len():
+ d = ExtType(42, b"abc")
+ packed = packb(d)
+
+ unpacker = Unpacker(max_ext_len=3)
+ unpacker.feed(packed)
+ assert unpacker.unpack() == d
+
+ unpacker = Unpacker(max_ext_len=2)
+ with pytest.raises(UnpackValueError):
+ unpacker.feed(packed)
+ unpacker.unpack()
+
+
+# PyPy fails following tests because of constant folding?
+# https://bugs.pypy.org/issue1721
+# @pytest.mark.skipif(True, reason="Requires very large memory.")
+# def test_binary():
+# x = b'x' * (2**32 - 1)
+# assert unpackb(packb(x)) == x
+# del x
+# x = b'x' * (2**32)
+# with pytest.raises(ValueError):
+# packb(x)
+#
+#
+# @pytest.mark.skipif(True, reason="Requires very large memory.")
+# def test_string():
+# x = 'x' * (2**32 - 1)
+# assert unpackb(packb(x)) == x
+# x += 'y'
+# with pytest.raises(ValueError):
+# packb(x)
+#
+#
+# @pytest.mark.skipif(True, reason="Requires very large memory.")
+# def test_array():
+# x = [0] * (2**32 - 1)
+# assert unpackb(packb(x)) == x
+# x.append(0)
+# with pytest.raises(ValueError):
+# packb(x)
+
+
+# auto max len
+
+
+def test_auto_max_array_len():
+ packed = b"\xde\x00\x06zz"
+ with pytest.raises(UnpackValueError):
+ unpackb(packed, raw=False)
+
+ unpacker = Unpacker(max_buffer_size=5, raw=False)
+ unpacker.feed(packed)
+ with pytest.raises(UnpackValueError):
+ unpacker.unpack()
+
+
+def test_auto_max_map_len():
+ # len(packed) == 6 -> max_map_len == 3
+ packed = b"\xde\x00\x04zzz"
+ with pytest.raises(UnpackValueError):
+ unpackb(packed, raw=False)
+
+ unpacker = Unpacker(max_buffer_size=6, raw=False)
+ unpacker.feed(packed)
+ with pytest.raises(UnpackValueError):
+ unpacker.unpack()
diff --git a/test/test_memoryview.py b/test/test_memoryview.py
new file mode 100644
index 00000000..0a2a6f53
--- /dev/null
+++ b/test/test_memoryview.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+
+from array import array
+
+from msgpack import packb, unpackb
+
+
+def make_array(f, data):
+ a = array(f)
+ a.frombytes(data)
+ return a
+
+
+def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type):
+ # create a new array
+ original_array = array(format)
+ original_array.fromlist([255] * (nbytes // original_array.itemsize))
+ original_data = original_array.tobytes()
+ view = memoryview(original_array)
+
+ # pack, unpack, and reconstruct array
+ packed = packb(view, use_bin_type=use_bin_type)
+ unpacked = unpackb(packed, raw=(not use_bin_type))
+ reconstructed_array = make_array(format, unpacked)
+
+ # check that we got the right amount of data
+ assert len(original_data) == nbytes
+ # check packed header
+ assert packed[:1] == expected_header
+ # check packed length prefix, if any
+ assert packed[1 : 1 + len(expected_prefix)] == expected_prefix
+ # check packed data
+ assert packed[1 + len(expected_prefix) :] == original_data
+ # check array unpacked correctly
+ assert original_array == reconstructed_array
+
+
+def test_fixstr_from_byte():
+ _runtest("B", 1, b"\xa1", b"", False)
+ _runtest("B", 31, b"\xbf", b"", False)
+
+
+def test_fixstr_from_float():
+ _runtest("f", 4, b"\xa4", b"", False)
+ _runtest("f", 28, b"\xbc", b"", False)
+
+
+def test_str16_from_byte():
+ _runtest("B", 2**8, b"\xda", b"\x01\x00", False)
+ _runtest("B", 2**16 - 1, b"\xda", b"\xff\xff", False)
+
+
+def test_str16_from_float():
+ _runtest("f", 2**8, b"\xda", b"\x01\x00", False)
+ _runtest("f", 2**16 - 4, b"\xda", b"\xff\xfc", False)
+
+
+def test_str32_from_byte():
+ _runtest("B", 2**16, b"\xdb", b"\x00\x01\x00\x00", False)
+
+
+def test_str32_from_float():
+ _runtest("f", 2**16, b"\xdb", b"\x00\x01\x00\x00", False)
+
+
+def test_bin8_from_byte():
+ _runtest("B", 1, b"\xc4", b"\x01", True)
+ _runtest("B", 2**8 - 1, b"\xc4", b"\xff", True)
+
+
+def test_bin8_from_float():
+ _runtest("f", 4, b"\xc4", b"\x04", True)
+ _runtest("f", 2**8 - 4, b"\xc4", b"\xfc", True)
+
+
+def test_bin16_from_byte():
+ _runtest("B", 2**8, b"\xc5", b"\x01\x00", True)
+ _runtest("B", 2**16 - 1, b"\xc5", b"\xff\xff", True)
+
+
+def test_bin16_from_float():
+ _runtest("f", 2**8, b"\xc5", b"\x01\x00", True)
+ _runtest("f", 2**16 - 4, b"\xc5", b"\xff\xfc", True)
+
+
+def test_bin32_from_byte():
+ _runtest("B", 2**16, b"\xc6", b"\x00\x01\x00\x00", True)
+
+
+def test_bin32_from_float():
+ _runtest("f", 2**16, b"\xc6", b"\x00\x01\x00\x00", True)
+
+
+def test_multidim_memoryview():
+ # See https://github.com/msgpack/msgpack-python/issues/526
+ view = memoryview(b"\00" * 6)
+ data = view.cast(view.format, (3, 2))
+ packed = packb(data)
+ assert packed == b"\xc4\x06\x00\x00\x00\x00\x00\x00"
diff --git a/test/test_newspec.py b/test/test_newspec.py
new file mode 100644
index 00000000..9e2f9be5
--- /dev/null
+++ b/test/test_newspec.py
@@ -0,0 +1,90 @@
+from msgpack import ExtType, packb, unpackb
+
+
+def test_str8():
+ header = b"\xd9"
+ data = b"x" * 32
+ b = packb(data.decode(), use_bin_type=True)
+ assert len(b) == len(data) + 2
+ assert b[0:2] == header + b"\x20"
+ assert b[2:] == data
+ assert unpackb(b, raw=True) == data
+ assert unpackb(b, raw=False) == data.decode()
+
+ data = b"x" * 255
+ b = packb(data.decode(), use_bin_type=True)
+ assert len(b) == len(data) + 2
+ assert b[0:2] == header + b"\xff"
+ assert b[2:] == data
+ assert unpackb(b, raw=True) == data
+ assert unpackb(b, raw=False) == data.decode()
+
+
+def test_bin8():
+ header = b"\xc4"
+ data = b""
+ b = packb(data, use_bin_type=True)
+ assert len(b) == len(data) + 2
+ assert b[0:2] == header + b"\x00"
+ assert b[2:] == data
+ assert unpackb(b) == data
+
+ data = b"x" * 255
+ b = packb(data, use_bin_type=True)
+ assert len(b) == len(data) + 2
+ assert b[0:2] == header + b"\xff"
+ assert b[2:] == data
+ assert unpackb(b) == data
+
+
+def test_bin16():
+ header = b"\xc5"
+ data = b"x" * 256
+ b = packb(data, use_bin_type=True)
+ assert len(b) == len(data) + 3
+ assert b[0:1] == header
+ assert b[1:3] == b"\x01\x00"
+ assert b[3:] == data
+ assert unpackb(b) == data
+
+ data = b"x" * 65535
+ b = packb(data, use_bin_type=True)
+ assert len(b) == len(data) + 3
+ assert b[0:1] == header
+ assert b[1:3] == b"\xff\xff"
+ assert b[3:] == data
+ assert unpackb(b) == data
+
+
+def test_bin32():
+ header = b"\xc6"
+ data = b"x" * 65536
+ b = packb(data, use_bin_type=True)
+ assert len(b) == len(data) + 5
+ assert b[0:1] == header
+ assert b[1:5] == b"\x00\x01\x00\x00"
+ assert b[5:] == data
+ assert unpackb(b) == data
+
+
+def test_ext():
+ def check(ext, packed):
+ assert packb(ext) == packed
+ assert unpackb(packed) == ext
+
+ check(ExtType(0x42, b"Z"), b"\xd4\x42Z") # fixext 1
+ check(ExtType(0x42, b"ZZ"), b"\xd5\x42ZZ") # fixext 2
+ check(ExtType(0x42, b"Z" * 4), b"\xd6\x42" + b"Z" * 4) # fixext 4
+ check(ExtType(0x42, b"Z" * 8), b"\xd7\x42" + b"Z" * 8) # fixext 8
+ check(ExtType(0x42, b"Z" * 16), b"\xd8\x42" + b"Z" * 16) # fixext 16
+ # ext 8
+ check(ExtType(0x42, b""), b"\xc7\x00\x42")
+ check(ExtType(0x42, b"Z" * 255), b"\xc7\xff\x42" + b"Z" * 255)
+ # ext 16
+ check(ExtType(0x42, b"Z" * 256), b"\xc8\x01\x00\x42" + b"Z" * 256)
+ check(ExtType(0x42, b"Z" * 0xFFFF), b"\xc8\xff\xff\x42" + b"Z" * 0xFFFF)
+ # ext 32
+ check(ExtType(0x42, b"Z" * 0x10000), b"\xc9\x00\x01\x00\x00\x42" + b"Z" * 0x10000)
+ # needs large memory
+ # check(ExtType(0x42, b'Z'*0xffffffff),
+ # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff)
diff --git a/test/test_obj.py b/test/test_obj.py
index fbf610c6..23be06d5 100644
--- a/test/test_obj.py
+++ b/test/test_obj.py
@@ -1,67 +1,82 @@
#!/usr/bin/env python
-# coding: utf-8
from pytest import raises
+
from msgpack import packb, unpackb
+
def _decode_complex(obj):
- if b'__complex__' in obj:
- return complex(obj[b'real'], obj[b'imag'])
+ if b"__complex__" in obj:
+ return complex(obj[b"real"], obj[b"imag"])
return obj
+
def _encode_complex(obj):
if isinstance(obj, complex):
- return {b'__complex__': True, b'real': 1, b'imag': 2}
+ return {b"__complex__": True, b"real": 1, b"imag": 2}
return obj
+
def test_encode_hook():
- packed = packb([3, 1+2j], default=_encode_complex)
+ packed = packb([3, 1 + 2j], default=_encode_complex)
unpacked = unpackb(packed, use_list=1)
- assert unpacked[1] == {b'__complex__': True, b'real': 1, b'imag': 2}
+ assert unpacked[1] == {b"__complex__": True, b"real": 1, b"imag": 2}
+
def test_decode_hook():
- packed = packb([3, {b'__complex__': True, b'real': 1, b'imag': 2}])
+ packed = packb([3, {b"__complex__": True, b"real": 1, b"imag": 2}])
unpacked = unpackb(packed, object_hook=_decode_complex, use_list=1)
- assert unpacked[1] == 1+2j
+ assert unpacked[1] == 1 + 2j
+
def test_decode_pairs_hook():
packed = packb([3, {1: 2, 3: 4}])
prod_sum = 1 * 2 + 3 * 4
- unpacked = unpackb(packed, object_pairs_hook=lambda l: sum(k * v for k, v in l), use_list=1)
+ unpacked = unpackb(
+ packed,
+ object_pairs_hook=lambda lst: sum(k * v for k, v in lst),
+ use_list=1,
+ strict_map_key=False,
+ )
assert unpacked[1] == prod_sum
+
def test_only_one_obj_hook():
- with raises(ValueError):
- unpackb(b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x)
+ with raises(TypeError):
+ unpackb(b"", object_hook=lambda x: x, object_pairs_hook=lambda x: x)
+
def test_bad_hook():
- with raises(ValueError):
- packed = packb([3, 1+2j], default=lambda o: o)
- unpacked = unpackb(packed, use_list=1)
+ with raises(TypeError):
+ packed = packb([3, 1 + 2j], default=lambda o: o)
+ unpackb(packed, use_list=1)
+
def _arr_to_str(arr):
- return ''.join(str(c) for c in arr)
+ return "".join(str(c) for c in arr)
+
def test_array_hook():
- packed = packb([1,2,3])
+ packed = packb([1, 2, 3])
unpacked = unpackb(packed, list_hook=_arr_to_str, use_list=1)
- assert unpacked == '123'
+ assert unpacked == "123"
class DecodeError(Exception):
pass
+
def bad_complex_decoder(o):
raise DecodeError("Ooops!")
def test_an_exception_in_objecthook1():
with raises(DecodeError):
- packed = packb({1: {'__complex__': True, 'real': 1, 'imag': 2}})
- unpackb(packed, object_hook=bad_complex_decoder)
+ packed = packb({1: {"__complex__": True, "real": 1, "imag": 2}})
+ unpackb(packed, object_hook=bad_complex_decoder, strict_map_key=False)
def test_an_exception_in_objecthook2():
with raises(DecodeError):
- packed = packb({1: [{'__complex__': True, 'real': 1, 'imag': 2}]})
- unpackb(packed, list_hook=bad_complex_decoder, use_list=1)
+ packed = packb({1: [{"__complex__": True, "real": 1, "imag": 2}]})
+ unpackb(packed, list_hook=bad_complex_decoder, use_list=1, strict_map_key=False)
diff --git a/test/test_pack.py b/test/test_pack.py
index 3225f41c..374d1549 100644
--- a/test/test_pack.py
+++ b/test/test_pack.py
@@ -1,95 +1,113 @@
#!/usr/bin/env python
-# coding: utf-8
-import six
import struct
-from pytest import raises, xfail
+from collections import OrderedDict
+from io import BytesIO
-from msgpack import packb, unpackb, Unpacker, Packer
+import pytest
+
+from msgpack import Packer, Unpacker, packb, unpackb
-from io import BytesIO
def check(data, use_list=False):
- re = unpackb(packb(data), use_list=use_list)
+ re = unpackb(packb(data), use_list=use_list, strict_map_key=False)
assert re == data
+
def testPack():
test_data = [
- 0, 1, 127, 128, 255, 256, 65535, 65536,
- -1, -32, -33, -128, -129, -32768, -32769,
- 1.0,
- b"", b"a", b"a"*31, b"a"*32,
- None, True, False,
- (), ((),), ((), None,),
+ 0,
+ 1,
+ 127,
+ 128,
+ 255,
+ 256,
+ 65535,
+ 65536,
+ 4294967295,
+ 4294967296,
+ -1,
+ -32,
+ -33,
+ -128,
+ -129,
+ -32768,
+ -32769,
+ -4294967296,
+ -4294967297,
+ 1.0,
+ b"",
+ b"a",
+ b"a" * 31,
+ b"a" * 32,
+ None,
+ True,
+ False,
+ (),
+ ((),),
+ ((), None),
{None: 0},
- (1<<23),
- ]
+ (1 << 23),
+ ]
for td in test_data:
check(td)
+
def testPackUnicode():
- test_data = [
- six.u(""), six.u("abcd"), [six.u("defgh")], six.u("Русский текст"),
- ]
+ test_data = ["", "abcd", ["defgh"], "Русский текст"]
for td in test_data:
- re = unpackb(packb(td, encoding='utf-8'), use_list=1, encoding='utf-8')
+ re = unpackb(packb(td), use_list=1, raw=False)
assert re == td
- packer = Packer(encoding='utf-8')
+ packer = Packer()
data = packer.pack(td)
- re = Unpacker(BytesIO(data), encoding='utf-8', use_list=1).unpack()
+ re = Unpacker(BytesIO(data), raw=False, use_list=1).unpack()
assert re == td
-def testPackUTF32():
- try:
- test_data = [
- six.u(""),
- six.u("abcd"),
- [six.u("defgh")],
- six.u("Русский текст"),
- ]
- for td in test_data:
- re = unpackb(packb(td, encoding='utf-32'), use_list=1, encoding='utf-32')
- assert re == td
- except LookupError as e:
- xfail(e)
def testPackBytes():
- test_data = [
- b"", b"abcd", (b"defgh",),
- ]
+ test_data = [b"", b"abcd", (b"defgh",)]
for td in test_data:
check(td)
+
+def testPackByteArrays():
+ test_data = [bytearray(b""), bytearray(b"abcd"), (bytearray(b"defgh"),)]
+ for td in test_data:
+ check(td)
+
+
def testIgnoreUnicodeErrors():
- re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1)
+ re = unpackb(packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore")
assert re == "abcdef"
+
def testStrictUnicodeUnpack():
- with raises(UnicodeDecodeError):
- unpackb(packb(b'abc\xeddef'), encoding='utf-8', use_list=1)
+ packed = packb(b"abc\xeddef", use_bin_type=False)
+ with pytest.raises(UnicodeDecodeError):
+ unpackb(packed, raw=False, use_list=1)
-def testStrictUnicodePack():
- with raises(UnicodeEncodeError):
- packb(six.u("abc\xeddef"), encoding='ascii', unicode_errors='strict')
def testIgnoreErrorsPack():
- re = unpackb(packb(six.u("abcФФФdef"), encoding='ascii', unicode_errors='ignore'), encoding='utf-8', use_list=1)
- assert re == six.u("abcdef")
+ re = unpackb(
+ packb("abc\udc80\udcffdef", use_bin_type=True, unicode_errors="ignore"),
+ raw=False,
+ use_list=1,
+ )
+ assert re == "abcdef"
-def testNoEncoding():
- with raises(TypeError):
- packb(six.u("abc"), encoding=None)
def testDecodeBinary():
- re = unpackb(packb("abc"), encoding=None, use_list=1)
+ re = unpackb(packb(b"abc"), use_list=1)
assert re == b"abc"
+
def testPackFloat():
- assert packb(1.0, use_single_float=True) == b'\xca' + struct.pack('>f', 1.0)
- assert packb(1.0, use_single_float=False) == b'\xcb' + struct.pack('>d', 1.0)
+ assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(">f", 1.0)
+ assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(">d", 1.0)
+
def testArraySize(sizes=[0, 5, 50, 1000]):
- bio = six.BytesIO()
+ bio = BytesIO()
packer = Packer()
for size in sizes:
bio.write(packer.pack_array_header(size))
@@ -101,6 +119,7 @@ def testArraySize(sizes=[0, 5, 50, 1000]):
for size in sizes:
assert unpacker.unpack() == list(range(size))
+
def test_manualreset(sizes=[0, 5, 50, 1000]):
packer = Packer(autoreset=False)
for size in sizes:
@@ -108,56 +127,55 @@ def test_manualreset(sizes=[0, 5, 50, 1000]):
for i in range(size):
packer.pack(i)
- bio = six.BytesIO(packer.bytes())
+ bio = BytesIO(packer.bytes())
unpacker = Unpacker(bio, use_list=1)
for size in sizes:
assert unpacker.unpack() == list(range(size))
packer.reset()
- assert packer.bytes() == b''
+ assert packer.bytes() == b""
+
def testMapSize(sizes=[0, 5, 50, 1000]):
- bio = six.BytesIO()
+ bio = BytesIO()
packer = Packer()
for size in sizes:
bio.write(packer.pack_map_header(size))
for i in range(size):
- bio.write(packer.pack(i)) # key
- bio.write(packer.pack(i * 2)) # value
+ bio.write(packer.pack(i)) # key
+ bio.write(packer.pack(i * 2)) # value
bio.seek(0)
- unpacker = Unpacker(bio)
+ unpacker = Unpacker(bio, strict_map_key=False)
for size in sizes:
- assert unpacker.unpack() == dict((i, i * 2) for i in range(size))
-
-
-class odict(dict):
- '''Reimplement OrderedDict to run test on Python 2.6'''
- def __init__(self, seq):
- self._seq = seq
- dict.__init__(self, seq)
-
- def items(self):
- return self._seq[:]
+ assert unpacker.unpack() == {i: i * 2 for i in range(size)}
- def iteritems(self):
- return iter(self._seq)
-
- def keys(self):
- return [x[0] for x in self._seq]
def test_odict():
- seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)]
- od = odict(seq)
+ seq = [(b"one", 1), (b"two", 2), (b"three", 3), (b"four", 4)]
+ od = OrderedDict(seq)
assert unpackb(packb(od), use_list=1) == dict(seq)
+
def pair_hook(seq):
return list(seq)
+
assert unpackb(packb(od), object_pairs_hook=pair_hook, use_list=1) == seq
def test_pairlist():
- pairlist = [(b'a', 1), (2, b'b'), (b'foo', b'bar')]
+ pairlist = [(b"a", 1), (2, b"b"), (b"foo", b"bar")]
packer = Packer()
packed = packer.pack_map_pairs(pairlist)
- unpacked = unpackb(packed, object_pairs_hook=list)
+ unpacked = unpackb(packed, object_pairs_hook=list, strict_map_key=False)
assert pairlist == unpacked
+
+
+def test_get_buffer():
+ packer = Packer(autoreset=0, use_bin_type=True)
+ packer.pack([1, 2])
+ strm = BytesIO()
+ strm.write(packer.getbuffer())
+ written = strm.getvalue()
+
+ expected = packb([1, 2], use_bin_type=True)
+ assert written == expected
diff --git a/test/test_read_size.py b/test/test_read_size.py
index 4e6c2b93..0f6c1b50 100644
--- a/test/test_read_size.py
+++ b/test/test_read_size.py
@@ -1,66 +1,72 @@
"""Test Unpacker's read_array_header and read_map_header methods"""
-from msgpack import packb, Unpacker, OutOfData
+
+from msgpack import OutOfData, Unpacker, packb
+
UnexpectedTypeException = ValueError
+
def test_read_array_header():
unpacker = Unpacker()
- unpacker.feed(packb(['a', 'b', 'c']))
+ unpacker.feed(packb(["a", "b", "c"]))
assert unpacker.read_array_header() == 3
- assert unpacker.unpack() == b'a'
- assert unpacker.unpack() == b'b'
- assert unpacker.unpack() == b'c'
+ assert unpacker.unpack() == "a"
+ assert unpacker.unpack() == "b"
+ assert unpacker.unpack() == "c"
try:
unpacker.unpack()
- assert 0, 'should raise exception'
+ assert 0, "should raise exception"
except OutOfData:
- assert 1, 'okay'
+ assert 1, "okay"
def test_read_map_header():
unpacker = Unpacker()
- unpacker.feed(packb({'a': 'A'}))
+ unpacker.feed(packb({"a": "A"}))
assert unpacker.read_map_header() == 1
- assert unpacker.unpack() == B'a'
- assert unpacker.unpack() == B'A'
+ assert unpacker.unpack() == "a"
+ assert unpacker.unpack() == "A"
try:
unpacker.unpack()
- assert 0, 'should raise exception'
+ assert 0, "should raise exception"
except OutOfData:
- assert 1, 'okay'
+ assert 1, "okay"
+
def test_incorrect_type_array():
unpacker = Unpacker()
unpacker.feed(packb(1))
try:
unpacker.read_array_header()
- assert 0, 'should raise exception'
+ assert 0, "should raise exception"
except UnexpectedTypeException:
- assert 1, 'okay'
+ assert 1, "okay"
+
def test_incorrect_type_map():
unpacker = Unpacker()
unpacker.feed(packb(1))
try:
unpacker.read_map_header()
- assert 0, 'should raise exception'
+ assert 0, "should raise exception"
except UnexpectedTypeException:
- assert 1, 'okay'
+ assert 1, "okay"
+
def test_correct_type_nested_array():
unpacker = Unpacker()
- unpacker.feed(packb({'a': ['b', 'c', 'd']}))
+ unpacker.feed(packb({"a": ["b", "c", "d"]}))
try:
unpacker.read_array_header()
- assert 0, 'should raise exception'
+ assert 0, "should raise exception"
except UnexpectedTypeException:
- assert 1, 'okay'
+ assert 1, "okay"
+
def test_incorrect_type_nested_map():
unpacker = Unpacker()
- unpacker.feed(packb([{'a': 'b'}]))
+ unpacker.feed(packb([{"a": "b"}]))
try:
unpacker.read_map_header()
- assert 0, 'should raise exception'
+ assert 0, "should raise exception"
except UnexpectedTypeException:
- assert 1, 'okay'
-
+ assert 1, "okay"
diff --git a/test/test_seq.py b/test/test_seq.py
index af719b05..8dee4620 100644
--- a/test/test_seq.py
+++ b/test/test_seq.py
@@ -1,16 +1,15 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-import six
+# ruff: noqa: E501
+# ignore line length limit for long comments
import io
+
import msgpack
-binarydata = [chr(i) for i in range(256)]
-binarydata = six.b("".join(binarydata))
+binarydata = bytes(bytearray(range(256)))
+
def gen_binary_data(idx):
- data = binarydata[:idx % 300]
- return data
+ return binarydata[: idx % 300]
+
def test_exceeding_unpacker_read_size():
dumpf = io.BytesIO()
@@ -19,10 +18,10 @@ def test_exceeding_unpacker_read_size():
NUMBER_OF_STRINGS = 6
read_size = 16
- # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop):
- # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev)
- # 40 ok for read_size=1024, while 50 introduces errors
- # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev):
+ # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop):
+ # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev)
+ # 40 ok for read_size=1024, while 50 introduces errors
+ # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev):
for idx in range(NUMBER_OF_STRINGS):
data = gen_binary_data(idx)
@@ -35,7 +34,7 @@ def test_exceeding_unpacker_read_size():
read_count = 0
for idx, o in enumerate(unpacker):
- assert type(o) == bytes
+ assert isinstance(o, bytes)
assert o == gen_binary_data(idx)
read_count += 1
diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py
index 9db14ca7..0f895d7d 100644
--- a/test/test_sequnpack.py
+++ b/test/test_sequnpack.py
@@ -1,87 +1,148 @@
#!/usr/bin/env python
-# coding: utf-8
+import io
-import six
-from msgpack import Unpacker, BufferFull
-from msgpack.exceptions import OutOfData
from pytest import raises
+from msgpack import BufferFull, Unpacker, pack, packb
+from msgpack.exceptions import OutOfData
+
def test_partialdata():
unpacker = Unpacker()
- unpacker.feed(b'\xa5')
- with raises(StopIteration): next(iter(unpacker))
- unpacker.feed(b'h')
- with raises(StopIteration): next(iter(unpacker))
- unpacker.feed(b'a')
- with raises(StopIteration): next(iter(unpacker))
- unpacker.feed(b'l')
- with raises(StopIteration): next(iter(unpacker))
- unpacker.feed(b'l')
- with raises(StopIteration): next(iter(unpacker))
- unpacker.feed(b'o')
- assert next(iter(unpacker)) == b'hallo'
+ unpacker.feed(b"\xa5")
+ with raises(StopIteration):
+ next(iter(unpacker))
+ unpacker.feed(b"h")
+ with raises(StopIteration):
+ next(iter(unpacker))
+ unpacker.feed(b"a")
+ with raises(StopIteration):
+ next(iter(unpacker))
+ unpacker.feed(b"l")
+ with raises(StopIteration):
+ next(iter(unpacker))
+ unpacker.feed(b"l")
+ with raises(StopIteration):
+ next(iter(unpacker))
+ unpacker.feed(b"o")
+ assert next(iter(unpacker)) == "hallo"
+
def test_foobar():
unpacker = Unpacker(read_size=3, use_list=1)
- unpacker.feed(b'foobar')
- assert unpacker.unpack() == ord(b'f')
- assert unpacker.unpack() == ord(b'o')
- assert unpacker.unpack() == ord(b'o')
- assert unpacker.unpack() == ord(b'b')
- assert unpacker.unpack() == ord(b'a')
- assert unpacker.unpack() == ord(b'r')
+ unpacker.feed(b"foobar")
+ assert unpacker.unpack() == ord(b"f")
+ assert unpacker.unpack() == ord(b"o")
+ assert unpacker.unpack() == ord(b"o")
+ assert unpacker.unpack() == ord(b"b")
+ assert unpacker.unpack() == ord(b"a")
+ assert unpacker.unpack() == ord(b"r")
with raises(OutOfData):
unpacker.unpack()
- unpacker.feed(b'foo')
- unpacker.feed(b'bar')
+ unpacker.feed(b"foo")
+ unpacker.feed(b"bar")
k = 0
- for o, e in zip(unpacker, 'foobarbaz'):
+ for o, e in zip(unpacker, "foobarbaz"):
assert o == ord(e)
k += 1
- assert k == len(b'foobar')
+ assert k == len(b"foobar")
+
def test_foobar_skip():
unpacker = Unpacker(read_size=3, use_list=1)
- unpacker.feed(b'foobar')
- assert unpacker.unpack() == ord(b'f')
+ unpacker.feed(b"foobar")
+ assert unpacker.unpack() == ord(b"f")
unpacker.skip()
- assert unpacker.unpack() == ord(b'o')
+ assert unpacker.unpack() == ord(b"o")
unpacker.skip()
- assert unpacker.unpack() == ord(b'a')
+ assert unpacker.unpack() == ord(b"a")
unpacker.skip()
with raises(OutOfData):
unpacker.unpack()
+
def test_maxbuffersize():
with raises(ValueError):
Unpacker(read_size=5, max_buffer_size=3)
unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1)
- unpacker.feed(b'fo')
+ unpacker.feed(b"fo")
+ with raises(BufferFull):
+ unpacker.feed(b"ob")
+ unpacker.feed(b"o")
+ assert ord("f") == next(unpacker)
+ unpacker.feed(b"b")
+ assert ord("o") == next(unpacker)
+ assert ord("o") == next(unpacker)
+ assert ord("b") == next(unpacker)
+
+
+def test_maxbuffersize_file():
+ buff = io.BytesIO(packb(b"a" * 10) + packb([b"a" * 20] * 2))
+ unpacker = Unpacker(buff, read_size=1, max_buffer_size=19, max_bin_len=20)
+ assert unpacker.unpack() == b"a" * 10
+ # assert unpacker.unpack() == [b"a" * 20]*2
with raises(BufferFull):
- unpacker.feed(b'ob')
- unpacker.feed(b'o')
- assert ord('f') == next(unpacker)
- unpacker.feed(b'b')
- assert ord('o') == next(unpacker)
- assert ord('o') == next(unpacker)
- assert ord('b') == next(unpacker)
+ print(unpacker.unpack())
def test_readbytes():
unpacker = Unpacker(read_size=3)
- unpacker.feed(b'foobar')
- assert unpacker.unpack() == ord(b'f')
- assert unpacker.read_bytes(3) == b'oob'
- assert unpacker.unpack() == ord(b'a')
- assert unpacker.unpack() == ord(b'r')
+ unpacker.feed(b"foobar")
+ assert unpacker.unpack() == ord(b"f")
+ assert unpacker.read_bytes(3) == b"oob"
+ assert unpacker.unpack() == ord(b"a")
+ assert unpacker.unpack() == ord(b"r")
# Test buffer refill
- unpacker = Unpacker(six.BytesIO(b'foobar'), read_size=3)
- assert unpacker.unpack() == ord(b'f')
- assert unpacker.read_bytes(3) == b'oob'
- assert unpacker.unpack() == ord(b'a')
- assert unpacker.unpack() == ord(b'r')
+ unpacker = Unpacker(io.BytesIO(b"foobar"), read_size=3)
+ assert unpacker.unpack() == ord(b"f")
+ assert unpacker.read_bytes(3) == b"oob"
+ assert unpacker.unpack() == ord(b"a")
+ assert unpacker.unpack() == ord(b"r")
+
+ # Issue 352
+ u = Unpacker()
+ u.feed(b"x")
+ assert bytes(u.read_bytes(1)) == b"x"
+ with raises(StopIteration):
+ next(u)
+ u.feed(b"\1")
+ assert next(u) == 1
+
+
+def test_issue124():
+ unpacker = Unpacker()
+ unpacker.feed(b"\xa1?\xa1!")
+ assert tuple(unpacker) == ("?", "!")
+ assert tuple(unpacker) == ()
+ unpacker.feed(b"\xa1?\xa1")
+ assert tuple(unpacker) == ("?",)
+ assert tuple(unpacker) == ()
+ unpacker.feed(b"!")
+ assert tuple(unpacker) == ("!",)
+ assert tuple(unpacker) == ()
+
+def test_unpack_tell():
+ stream = io.BytesIO()
+ messages = [2**i - 1 for i in range(65)]
+ messages += [-(2**i) for i in range(1, 64)]
+ messages += [
+ b"hello",
+ b"hello" * 1000,
+ list(range(20)),
+ {i: bytes(i) * i for i in range(10)},
+ {i: bytes(i) * i for i in range(32)},
+ ]
+ offsets = []
+ for m in messages:
+ pack(m, stream)
+ offsets.append(stream.tell())
+ stream.seek(0)
+ unpacker = Unpacker(stream, strict_map_key=False)
+ for m, o in zip(messages, offsets):
+ m2 = next(unpacker)
+ assert m == m2
+ assert o == unpacker.tell()
diff --git a/test/test_stricttype.py b/test/test_stricttype.py
new file mode 100644
index 00000000..72776a2c
--- /dev/null
+++ b/test/test_stricttype.py
@@ -0,0 +1,59 @@
+from collections import namedtuple
+
+from msgpack import ExtType, packb, unpackb
+
+
+def test_namedtuple():
+ T = namedtuple("T", "foo bar")
+
+ def default(o):
+ if isinstance(o, T):
+ return dict(o._asdict())
+ raise TypeError(f"Unsupported type {type(o)}")
+
+ packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default)
+ unpacked = unpackb(packed, raw=False)
+ assert unpacked == {"foo": 1, "bar": 42}
+
+
+def test_tuple():
+ t = ("one", 2, b"three", (4,))
+
+ def default(o):
+ if isinstance(o, tuple):
+ return {"__type__": "tuple", "value": list(o)}
+ raise TypeError(f"Unsupported type {type(o)}")
+
+ def convert(o):
+ if o.get("__type__") == "tuple":
+ return tuple(o["value"])
+ return o
+
+ data = packb(t, strict_types=True, use_bin_type=True, default=default)
+ expected = unpackb(data, raw=False, object_hook=convert)
+
+ assert expected == t
+
+
+def test_tuple_ext():
+ t = ("one", 2, b"three", (4,))
+
+ MSGPACK_EXT_TYPE_TUPLE = 0
+
+ def default(o):
+ if isinstance(o, tuple):
+ # Convert to list and pack
+ payload = packb(list(o), strict_types=True, use_bin_type=True, default=default)
+ return ExtType(MSGPACK_EXT_TYPE_TUPLE, payload)
+ raise TypeError(repr(o))
+
+ def convert(code, payload):
+ if code == MSGPACK_EXT_TYPE_TUPLE:
+ # Unpack and convert to tuple
+ return tuple(unpackb(payload, raw=False, ext_hook=convert))
+ raise ValueError(f"Unknown Ext code {code}")
+
+ data = packb(t, strict_types=True, use_bin_type=True, default=default)
+ expected = unpackb(data, raw=False, ext_hook=convert)
+
+ assert expected == t
diff --git a/test/test_subtype.py b/test/test_subtype.py
index 6807508e..a911578c 100644
--- a/test/test_subtype.py
+++ b/test/test_subtype.py
@@ -1,19 +1,24 @@
#!/usr/bin/env python
-# coding: utf-8
-from msgpack import packb, unpackb
from collections import namedtuple
+from msgpack import packb
+
+
class MyList(list):
pass
+
class MyDict(dict):
pass
+
class MyTuple(tuple):
pass
-MyNamedTuple = namedtuple('MyNamedTuple', 'x y')
+
+MyNamedTuple = namedtuple("MyNamedTuple", "x y")
+
def test_types():
assert packb(MyDict()) == packb(dict())
diff --git a/test/test_timestamp.py b/test/test_timestamp.py
new file mode 100644
index 00000000..831141a1
--- /dev/null
+++ b/test/test_timestamp.py
@@ -0,0 +1,171 @@
+import datetime
+
+import pytest
+
+import msgpack
+from msgpack.ext import Timestamp
+
+
+def test_timestamp():
+ # timestamp32
+ ts = Timestamp(2**32 - 1)
+ assert ts.to_bytes() == b"\xff\xff\xff\xff"
+ packed = msgpack.packb(ts)
+ assert packed == b"\xd6\xff" + ts.to_bytes()
+ unpacked = msgpack.unpackb(packed)
+ assert ts == unpacked
+ assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0
+
+ # timestamp64
+ ts = Timestamp(2**34 - 1, 999999999)
+ assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff"
+ packed = msgpack.packb(ts)
+ assert packed == b"\xd7\xff" + ts.to_bytes()
+ unpacked = msgpack.unpackb(packed)
+ assert ts == unpacked
+ assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999
+
+ # timestamp96
+ ts = Timestamp(2**63 - 1, 999999999)
+ assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff"
+ packed = msgpack.packb(ts)
+ assert packed == b"\xc7\x0c\xff" + ts.to_bytes()
+ unpacked = msgpack.unpackb(packed)
+ assert ts == unpacked
+ assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999
+
+ # negative fractional
+ ts = Timestamp.from_unix(-2.3) # s: -3, ns: 700000000
+ assert ts.seconds == -3 and ts.nanoseconds == 700000000
+ assert ts.to_bytes() == b"\x29\xb9\x27\x00\xff\xff\xff\xff\xff\xff\xff\xfd"
+ packed = msgpack.packb(ts)
+ assert packed == b"\xc7\x0c\xff" + ts.to_bytes()
+ unpacked = msgpack.unpackb(packed)
+ assert ts == unpacked
+
+
+def test_unpack_timestamp():
+ # timestamp 32
+ assert msgpack.unpackb(b"\xd6\xff\x00\x00\x00\x00") == Timestamp(0)
+
+ # timestamp 64
+ assert msgpack.unpackb(b"\xd7\xff" + b"\x00" * 8) == Timestamp(0)
+ with pytest.raises(ValueError):
+ msgpack.unpackb(b"\xd7\xff" + b"\xff" * 8)
+
+ # timestamp 96
+ assert msgpack.unpackb(b"\xc7\x0c\xff" + b"\x00" * 12) == Timestamp(0)
+ with pytest.raises(ValueError):
+ msgpack.unpackb(b"\xc7\x0c\xff" + b"\xff" * 12) == Timestamp(0)
+
+ # Undefined
+ with pytest.raises(ValueError):
+ msgpack.unpackb(b"\xd4\xff\x00") # fixext 1
+ with pytest.raises(ValueError):
+ msgpack.unpackb(b"\xd5\xff\x00\x00") # fixext 2
+ with pytest.raises(ValueError):
+ msgpack.unpackb(b"\xc7\x00\xff") # ext8 (len=0)
+ with pytest.raises(ValueError):
+ msgpack.unpackb(b"\xc7\x03\xff\0\0\0") # ext8 (len=3)
+ with pytest.raises(ValueError):
+ msgpack.unpackb(b"\xc7\x05\xff\0\0\0\0\0") # ext8 (len=5)
+
+
+def test_timestamp_from():
+ t = Timestamp(42, 14000)
+ assert Timestamp.from_unix(42.000014) == t
+ assert Timestamp.from_unix_nano(42000014000) == t
+
+
+def test_timestamp_to():
+ t = Timestamp(42, 14000)
+ assert t.to_unix() == 42.000014
+ assert t.to_unix_nano() == 42000014000
+
+
+def test_timestamp_datetime():
+ t = Timestamp(42, 14)
+ utc = datetime.timezone.utc
+ assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc)
+
+ ts = datetime.datetime(2024, 4, 16, 8, 43, 9, 420317, tzinfo=utc)
+ ts2 = datetime.datetime(2024, 4, 16, 8, 43, 9, 420318, tzinfo=utc)
+
+ assert (
+ Timestamp.from_datetime(ts2).nanoseconds - Timestamp.from_datetime(ts).nanoseconds == 1000
+ )
+
+ ts3 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4256)
+ ts4 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4257)
+ assert (
+ Timestamp.from_datetime(ts4).nanoseconds - Timestamp.from_datetime(ts3).nanoseconds == 1000
+ )
+
+ assert Timestamp.from_datetime(ts).to_datetime() == ts
+
+
+def test_unpack_datetime():
+ t = Timestamp(42, 14)
+ utc = datetime.timezone.utc
+ packed = msgpack.packb(t)
+ unpacked = msgpack.unpackb(packed, timestamp=3)
+ assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc)
+
+
+def test_pack_unpack_before_epoch():
+ utc = datetime.timezone.utc
+ t_in = datetime.datetime(1960, 1, 1, tzinfo=utc)
+ packed = msgpack.packb(t_in, datetime=True)
+ unpacked = msgpack.unpackb(packed, timestamp=3)
+ assert unpacked == t_in
+
+
+def test_pack_datetime():
+ t = Timestamp(42, 14000)
+ dt = t.to_datetime()
+ utc = datetime.timezone.utc
+ assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc)
+
+ packed = msgpack.packb(dt, datetime=True)
+ packed2 = msgpack.packb(t)
+ assert packed == packed2
+
+ unpacked = msgpack.unpackb(packed)
+ print(packed, unpacked)
+ assert unpacked == t
+
+ unpacked = msgpack.unpackb(packed, timestamp=3)
+ assert unpacked == dt
+
+ x = []
+ packed = msgpack.packb(dt, datetime=False, default=x.append)
+ assert x
+ assert x[0] == dt
+ assert msgpack.unpackb(packed) is None
+
+
+def test_issue451():
+ # https://github.com/msgpack/msgpack-python/issues/451
+ utc = datetime.timezone.utc
+ dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=utc)
+ packed = msgpack.packb(dt, datetime=True)
+ assert packed == b"\xd6\xff\xf4\x86eL"
+
+ unpacked = msgpack.unpackb(packed, timestamp=3)
+ assert dt == unpacked
+
+
+def test_pack_datetime_without_tzinfo():
+ dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14)
+ with pytest.raises(ValueError, match="where tzinfo=None"):
+ packed = msgpack.packb(dt, datetime=True)
+
+ dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14)
+ packed = msgpack.packb(dt, datetime=True, default=lambda x: None)
+ assert packed == msgpack.packb(None)
+
+ utc = datetime.timezone.utc
+ dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc)
+ packed = msgpack.packb(dt, datetime=True)
+ unpacked = msgpack.unpackb(packed, timestamp=3)
+ assert unpacked == dt
diff --git a/test/test_unpack.py b/test/test_unpack.py
new file mode 100644
index 00000000..b17c3c53
--- /dev/null
+++ b/test/test_unpack.py
@@ -0,0 +1,89 @@
+import sys
+from io import BytesIO
+
+from pytest import mark, raises
+
+from msgpack import ExtType, OutOfData, Unpacker, packb
+
+
+def test_unpack_array_header_from_file():
+ f = BytesIO(packb([1, 2, 3, 4]))
+ unpacker = Unpacker(f)
+ assert unpacker.read_array_header() == 4
+ assert unpacker.unpack() == 1
+ assert unpacker.unpack() == 2
+ assert unpacker.unpack() == 3
+ assert unpacker.unpack() == 4
+ with raises(OutOfData):
+ unpacker.unpack()
+
+
+@mark.skipif(
+ "not hasattr(sys, 'getrefcount') == True",
+ reason="sys.getrefcount() is needed to pass this test",
+)
+def test_unpacker_hook_refcnt():
+ result = []
+
+ def hook(x):
+ result.append(x)
+ return x
+
+ basecnt = sys.getrefcount(hook)
+
+ up = Unpacker(object_hook=hook, list_hook=hook)
+
+ assert sys.getrefcount(hook) >= basecnt + 2
+
+ up.feed(packb([{}]))
+ up.feed(packb([{}]))
+ assert up.unpack() == [{}]
+ assert up.unpack() == [{}]
+ assert result == [{}, [{}], {}, [{}]]
+
+ del up
+
+ assert sys.getrefcount(hook) == basecnt
+
+
+def test_unpacker_ext_hook():
+ class MyUnpacker(Unpacker):
+ def __init__(self):
+ super().__init__(ext_hook=self._hook, raw=False)
+
+ def _hook(self, code, data):
+ if code == 1:
+ return int(data)
+ else:
+ return ExtType(code, data)
+
+ unpacker = MyUnpacker()
+ unpacker.feed(packb({"a": 1}))
+ assert unpacker.unpack() == {"a": 1}
+ unpacker.feed(packb({"a": ExtType(1, b"123")}))
+ assert unpacker.unpack() == {"a": 123}
+ unpacker.feed(packb({"a": ExtType(2, b"321")}))
+ assert unpacker.unpack() == {"a": ExtType(2, b"321")}
+
+
+def test_unpacker_tell():
+ objects = 1, 2, "abc", "def", "ghi"
+ packed = b"\x01\x02\xa3abc\xa3def\xa3ghi"
+ positions = 1, 2, 6, 10, 14
+ unpacker = Unpacker(BytesIO(packed))
+ for obj, unp, pos in zip(objects, unpacker, positions):
+ assert obj == unp
+ assert pos == unpacker.tell()
+
+
+def test_unpacker_tell_read_bytes():
+ objects = 1, "abc", "ghi"
+ packed = b"\x01\x02\xa3abc\xa3def\xa3ghi"
+ raw_data = b"\x02", b"\xa3def", b""
+ lenghts = 1, 4, 999
+ positions = 1, 6, 14
+ unpacker = Unpacker(BytesIO(packed))
+ for obj, unp, pos, n, raw in zip(objects, unpacker, positions, lenghts, raw_data):
+ assert obj == unp
+ assert pos == unpacker.tell()
+ assert unpacker.read_bytes(n) == raw
diff --git a/test/test_unpack_raw.py b/test/test_unpack_raw.py
deleted file mode 100644
index 9f3784c0..00000000
--- a/test/test_unpack_raw.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""Tests for cases where the user seeks to obtain packed msgpack objects"""
-
-import six
-from msgpack import Unpacker, packb
-
-
-def test_write_bytes():
- unpacker = Unpacker()
- unpacker.feed(b'abc')
- f = six.BytesIO()
- assert unpacker.unpack(f.write) == ord('a')
- assert f.getvalue() == b'a'
- f = six.BytesIO()
- assert unpacker.skip(f.write) is None
- assert f.getvalue() == b'b'
- f = six.BytesIO()
- assert unpacker.skip() is None
- assert f.getvalue() == b''
-
-
-def test_write_bytes_multi_buffer():
- long_val = (5) * 100
- expected = packb(long_val)
- unpacker = Unpacker(six.BytesIO(expected), read_size=3, max_buffer_size=3)
-
- f = six.BytesIO()
- unpacked = unpacker.unpack(f.write)
- assert unpacked == long_val
- assert f.getvalue() == expected
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index 19513524..00000000
--- a/tox.ini
+++ /dev/null
@@ -1,9 +0,0 @@
-[tox]
-envlist = py26,py27,py32,py33
-
-[testenv]
-deps=
- pytest
- six
-
-commands=py.test test
diff --git a/upload_windows.bat b/upload_windows.bat
deleted file mode 100644
index 5cd9a7c5..00000000
--- a/upload_windows.bat
+++ /dev/null
@@ -1,4 +0,0 @@
-c:\Python27\python setup.py bdist_egg bdist_wininst upload
-c:\Python33\python setup.py bdist_egg bdist_wininst upload
-c:\Python27_amd64\python setup.py bdist_egg bdist_wininst upload
-c:\Python33_amd64\python setup.py bdist_egg bdist_wininst upload