From 5a0152f7a0f5b93df4fdf996fa1b606e145e429b Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Mon, 5 Dec 2022 11:02:05 -0700 Subject: [PATCH 1/9] add missing error handling --- metadatadtype/metadatadtype/src/dtype.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/metadatadtype/metadatadtype/src/dtype.c b/metadatadtype/metadatadtype/src/dtype.c index b4976a3a..e9140562 100644 --- a/metadatadtype/metadatadtype/src/dtype.c +++ b/metadatadtype/metadatadtype/src/dtype.c @@ -28,6 +28,9 @@ get_value(PyObject *scalar) return -1; } double res = PyFloat_AsDouble(value); + if (res == -1 && PyErr_Occurred()) { + return -1; + } Py_DECREF(value); return res; } From ed95d72c7545188b71903a76641394e63f0b14df Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Mon, 5 Dec 2022 13:01:22 -0700 Subject: [PATCH 2/9] adding initial implementation of asciidtype; compiles but untested --- asciidtype/.clang-format | 37 +++ asciidtype/.flake8 | 2 + asciidtype/.gitignore | 3 + asciidtype/README.md | 27 +++ asciidtype/asciidtype/__init__.py | 10 + asciidtype/asciidtype/scalar.py | 10 + asciidtype/asciidtype/src/asciidtype_main.c | 57 +++++ asciidtype/asciidtype/src/casts.c | 11 + asciidtype/asciidtype/src/casts.h | 13 ++ asciidtype/asciidtype/src/dtype.c | 235 ++++++++++++++++++++ asciidtype/asciidtype/src/dtype.h | 30 +++ asciidtype/asciidtype/src/umath.c | 12 + asciidtype/asciidtype/src/umath.h | 4 + asciidtype/meson.build | 48 ++++ asciidtype/pyproject.toml | 20 ++ asciidtype/tests/conftest.py | 3 + asciidtype/tests/test_asciidtype.py | 11 + 17 files changed, 533 insertions(+) create mode 100644 asciidtype/.clang-format create mode 100644 asciidtype/.flake8 create mode 100644 asciidtype/.gitignore create mode 100644 asciidtype/README.md create mode 100644 asciidtype/asciidtype/__init__.py create mode 100644 asciidtype/asciidtype/scalar.py create mode 100644 asciidtype/asciidtype/src/asciidtype_main.c create mode 100644 asciidtype/asciidtype/src/casts.c create mode 100644 asciidtype/asciidtype/src/casts.h create mode 100644 asciidtype/asciidtype/src/dtype.c create mode 100644 asciidtype/asciidtype/src/dtype.h create mode 100644 asciidtype/asciidtype/src/umath.c create mode 100644 asciidtype/asciidtype/src/umath.h create mode 100644 asciidtype/meson.build create mode 100644 asciidtype/pyproject.toml create mode 100644 asciidtype/tests/conftest.py create mode 100644 asciidtype/tests/test_asciidtype.py diff --git a/asciidtype/.clang-format b/asciidtype/.clang-format new file mode 100644 index 00000000..60b1066b --- /dev/null +++ b/asciidtype/.clang-format @@ -0,0 +1,37 @@ +# A clang-format style that approximates Python's PEP 7 +# Useful for IDE integration +# +# Based on Paul Ganssle's version at +# https://gist.github.com/pganssle/0e3a5f828b4d07d79447f6ced8e7e4db +# and modified for NumPy +BasedOnStyle: Google +AlignAfterOpenBracket: Align +AllowShortEnumsOnASingleLine: false +AllowShortIfStatementsOnASingleLine: false +AlwaysBreakAfterReturnType: TopLevel +BreakBeforeBraces: Stroustrup +ColumnLimit: 79 +ContinuationIndentWidth: 8 +DerivePointerAlignment: false +IndentWidth: 4 +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^[<"](Python|structmember|pymem)\.h' + Priority: -3 + CaseSensitive: true + - Regex: '^"numpy/' + Priority: -2 + - Regex: '^"(npy_pycompat|npy_config)' + Priority: -1 + - Regex: '^"[[:alnum:]_.]+"' + Priority: 1 + - Regex: '^<[[:alnum:]_.]+"' + Priority: 2 +Language: Cpp +PointerAlignment: Right +ReflowComments: true +SpaceBeforeParens: ControlStatements +SpacesInParentheses: false +StatementMacros: [PyObject_HEAD, PyObject_VAR_HEAD, PyObject_HEAD_EXTRA] +TabWidth: 4 +UseTab: Never diff --git a/asciidtype/.flake8 b/asciidtype/.flake8 new file mode 100644 index 00000000..80676bc7 --- /dev/null +++ b/asciidtype/.flake8 @@ -0,0 +1,2 @@ +[flake8] +per-file-ignores = __init__.py:F401 diff --git a/asciidtype/.gitignore b/asciidtype/.gitignore new file mode 100644 index 00000000..900a7ce9 --- /dev/null +++ b/asciidtype/.gitignore @@ -0,0 +1,3 @@ +dist/ +.mesonpy*.ini +__pycache__ diff --git a/asciidtype/README.md b/asciidtype/README.md new file mode 100644 index 00000000..6d37ec58 --- /dev/null +++ b/asciidtype/README.md @@ -0,0 +1,27 @@ +# A dtype that stores ASCII data + +This is a simple proof-of-concept dtype using the (as of late 2022) experimental +[new dtype +implementation](https://numpy.org/neps/nep-0041-improved-dtype-support.html) in +NumPy. + +## Building + +Ensure Meson and NumPy are installed in the python environment you would like to use: + +``` +$ python3 -m pip install meson meson-python numpy build patchelf +``` + +Build with meson, create a wheel, and install it + +``` +$ rm -r dist/ +$ meson build +$ python -m build --wheel -Cbuilddir=build +$ python -m pip install dist/asciidtype*.whl +``` + +The `mesonpy` build backend for pip [does not currently support editable +installs](https://github.com/mesonbuild/meson-python/issues/47), so `pip install +-e .` will not work. diff --git a/asciidtype/asciidtype/__init__.py b/asciidtype/asciidtype/__init__.py new file mode 100644 index 00000000..768d649d --- /dev/null +++ b/asciidtype/asciidtype/__init__.py @@ -0,0 +1,10 @@ +"""A dtype for working with ASCII data + +This is an example usage of the experimental new dtype API +in Numpy and is not intended for any real purpose. +""" + +from .scalar import ASCIIScalar # isort: skip +from ._asciidtype_main import ASCIIDType + +__all__ = ["ASCIIDType", "ASCIIScalar"] diff --git a/asciidtype/asciidtype/scalar.py b/asciidtype/asciidtype/scalar.py new file mode 100644 index 00000000..c28d0261 --- /dev/null +++ b/asciidtype/asciidtype/scalar.py @@ -0,0 +1,10 @@ +"""A scalar type needed by the dtype machinery.""" + + +class ASCIIScalar: + def __init__(self, value, dtype): + self.value = value + self.dtype = dtype + + def __repr__(self): + return f"{self.value}" diff --git a/asciidtype/asciidtype/src/asciidtype_main.c b/asciidtype/asciidtype/src/asciidtype_main.c new file mode 100644 index 00000000..efed3025 --- /dev/null +++ b/asciidtype/asciidtype/src/asciidtype_main.c @@ -0,0 +1,57 @@ +#include + +#define PY_ARRAY_UNIQUE_SYMBOL asciidtype_ARRAY_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include "numpy/arrayobject.h" +#include "numpy/experimental_dtype_api.h" + +#include "dtype.h" + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + .m_name = "asciidtype_main", + .m_size = -1, +}; + +/* Module initialization function */ +PyMODINIT_FUNC +PyInit__asciidtype_main(void) +{ + if (_import_array() < 0) { + return NULL; + } + if (import_experimental_dtype_api(5) < 0) { + return NULL; + } + + PyObject *m = PyModule_Create(&moduledef); + if (m == NULL) { + return NULL; + } + + PyObject *mod = PyImport_ImportModule("asciidtype"); + if (mod == NULL) { + goto error; + } + ASCIIScalar_Type = + (PyTypeObject *)PyObject_GetAttrString(mod, "ASCIIScalar"); + Py_DECREF(mod); + + if (ASCIIScalar_Type == NULL) { + goto error; + } + + if (init_ascii_dtype() < 0) { + goto error; + } + + if (PyModule_AddObject(m, "ASCIIDType", (PyObject *)&ASCIIDType) < 0) { + goto error; + } + + return m; + +error: + Py_DECREF(m); + return NULL; +} diff --git a/asciidtype/asciidtype/src/casts.c b/asciidtype/asciidtype/src/casts.c new file mode 100644 index 00000000..cec1fcc6 --- /dev/null +++ b/asciidtype/asciidtype/src/casts.c @@ -0,0 +1,11 @@ +#include + +#define PY_ARRAY_UNIQUE_SYMBOL asciidtype_ARRAY_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#define NO_IMPORT_ARRAY +#include "numpy/arrayobject.h" +#include "numpy/experimental_dtype_api.h" +#include "numpy/ndarraytypes.h" + +#include "casts.h" +#include "dtype.h" diff --git a/asciidtype/asciidtype/src/casts.h b/asciidtype/asciidtype/src/casts.h new file mode 100644 index 00000000..af8ca056 --- /dev/null +++ b/asciidtype/asciidtype/src/casts.h @@ -0,0 +1,13 @@ +#ifndef _NPY_CASTS_H +#define _NPY_CASTS_H + +#include + +#define PY_ARRAY_UNIQUE_SYMBOL asciidtype_ARRAY_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#define NO_IMPORT_ARRAY +#include "numpy/arrayobject.h" +#include "numpy/experimental_dtype_api.h" +#include "numpy/ndarraytypes.h" + +#endif /* _NPY_CASTS_H */ diff --git a/asciidtype/asciidtype/src/dtype.c b/asciidtype/asciidtype/src/dtype.c new file mode 100644 index 00000000..260d287d --- /dev/null +++ b/asciidtype/asciidtype/src/dtype.c @@ -0,0 +1,235 @@ +#include "dtype.h" + +#include "casts.h" + +PyTypeObject *ASCIIScalar_Type = NULL; + +static char * +get_value(PyObject *scalar) +{ + PyTypeObject *scalar_type = Py_TYPE(scalar); + if (scalar_type != ASCIIScalar_Type) { + PyErr_SetString(PyExc_TypeError, + "Can only store ASCIIScalar in a ASCIIDType array."); + return NULL; + } + + PyObject *value = PyObject_GetAttrString(scalar, "value"); + if (value == NULL) { + return NULL; + } + PyObject *res_bytes = PyUnicode_AsASCIIString(value); + Py_DECREF(value); + char *res = PyBytes_AsString(res_bytes); + if (res == NULL) { + return NULL; + } + return res; +} + +/* + * Internal helper to create new instances + */ +ASCIIDTypeObject * +new_asciidtype_instance(PyObject *size) +{ + ASCIIDTypeObject *new = (ASCIIDTypeObject *)PyArrayDescr_Type.tp_new( + (PyTypeObject *)&ASCIIDType, NULL, NULL); + if (new == NULL) { + return NULL; + } + long size_l = PyLong_AsLong(size); + if (size_l == -1 && PyErr_Occurred()) { + return NULL; + } + new->size = size_l; + new->base.elsize = size_l * sizeof(char); + new->base.alignment = size_l *_Alignof(char); + + return new; +} + +/* + * This is used to determine the correct dtype to return when operations mix + * dtypes (I think?). For now just return the first one. + */ +static ASCIIDTypeObject * +common_instance(ASCIIDTypeObject *dtype1, ASCIIDTypeObject *dtype2) +{ + if (!PyObject_RichCompareBool((PyObject *)dtype1, (PyObject *)dtype2, + Py_EQ)) { + PyErr_SetString( + PyExc_RuntimeError, + "common_instance called on unequal ASCIIDType instances"); + return NULL; + } + return dtype1; +} + +static PyArray_DTypeMeta * +common_dtype(PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other) +{ + // for now always raise an error here until we can figure out + // how to deal with strings here + + PyErr_SetString(PyExc_RuntimeError, "common_dtype called in ASCIIDType"); + return NULL; + + // Py_INCREF(Py_NotImplemented); + // return (PyArray_DTypeMeta *)Py_NotImplemented; +} + +static PyArray_Descr * +ascii_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), + PyObject *obj) +{ + if (Py_TYPE(obj) != ASCIIScalar_Type) { + PyErr_SetString(PyExc_TypeError, + "Can only store ASCIIScalar in a ASCIIDType array."); + return NULL; + } + + PyArray_Descr *ret = (PyArray_Descr *)PyObject_GetAttrString(obj, "dtype"); + if (ret == NULL) { + return NULL; + } + return ret; +} + +static int +asciidtype_setitem(ASCIIDTypeObject *descr, PyObject *obj, char *dataptr) +{ + char *value = get_value(obj); + if (value == NULL) { + return -1; + } + + memcpy(dataptr, value, descr->size * sizeof(char)); // NOLINT + + return 0; +} + +static PyObject * +asciidtype_getitem(ASCIIDTypeObject *descr, char *dataptr) +{ + char *val = NULL; + /* get the value */ + memcpy(val, dataptr, descr->size * sizeof(char)); // NOLINT + + PyObject *val_obj = PyUnicode_FromStringAndSize(val, descr->size); + if (val_obj == NULL) { + return NULL; + } + + PyObject *res = PyObject_CallFunctionObjArgs((PyObject *)ASCIIScalar_Type, + val_obj, descr, NULL); + if (res == NULL) { + return NULL; + } + Py_DECREF(val_obj); + + return res; +} + +static ASCIIDTypeObject * +asciidtype_ensure_canonical(ASCIIDTypeObject *self) +{ + Py_INCREF(self); + return self; +} + +static PyType_Slot ASCIIDType_Slots[] = { + {NPY_DT_common_instance, &common_instance}, + {NPY_DT_common_dtype, &common_dtype}, + {NPY_DT_discover_descr_from_pyobject, + &ascii_discover_descriptor_from_pyobject}, + /* The header is wrong on main :(, so we add 1 */ + {NPY_DT_setitem, &asciidtype_setitem}, + {NPY_DT_getitem, &asciidtype_getitem}, + {NPY_DT_ensure_canonical, &asciidtype_ensure_canonical}, + {0, NULL}}; + +static PyObject * +asciidtype_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds) +{ + static char *kwargs_strs[] = {"size", NULL}; + + PyObject *size = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:ASCIIDType", kwargs_strs, + &size)) { + return NULL; + } + if (size == NULL) { + PyErr_SetString( + PyExc_TypeError, + "Must provide a size to instantiate an ASCIIDType instance"); + return NULL; + } + + return (PyObject *)new_asciidtype_instance(size); +} + +static void +asciidtype_dealloc(ASCIIDTypeObject *self) +{ + PyArrayDescr_Type.tp_dealloc((PyObject *)self); +} + +static PyObject * +asciidtype_repr(ASCIIDTypeObject *self) +{ + PyObject *res = PyUnicode_FromFormat("ASCIIDType(%ld)", self->size); + return res; +} + +static PyMemberDef ASCIIDType_members[] = { + {"size", T_OBJECT_EX, offsetof(ASCIIDTypeObject, size), READONLY, + "The number of characters per array element"}, + {NULL}, +}; + +/* + * This is the basic things that you need to create a Python Type/Class in C. + * However, there is a slight difference here because we create a + * PyArray_DTypeMeta, which is a larger struct than a typical type. + * (This should get a bit nicer eventually with Python >3.11.) + */ +PyArray_DTypeMeta ASCIIDType = { + {{ + PyVarObject_HEAD_INIT(NULL, 0).tp_name = + "asciidtype.ASCIIDType", + .tp_basicsize = sizeof(ASCIIDTypeObject), + .tp_new = asciidtype_new, + .tp_dealloc = (destructor)asciidtype_dealloc, + .tp_repr = (reprfunc)asciidtype_repr, + .tp_str = (reprfunc)asciidtype_repr, + .tp_members = ASCIIDType_members, + }}, + /* rest, filled in during DTypeMeta initialization */ +}; + +int +init_ascii_dtype(void) +{ + PyArrayDTypeMeta_Spec ASCIIDType_DTypeSpec = { + .flags = NPY_DT_PARAMETRIC, + .typeobj = ASCIIScalar_Type, + .slots = ASCIIDType_Slots, + }; + /* Loaded dynamically, so may need to be set here: */ + ((PyObject *)&ASCIIDType)->ob_type = &PyArrayDTypeMeta_Type; + ((PyTypeObject *)&ASCIIDType)->tp_base = &PyArrayDescr_Type; + if (PyType_Ready((PyTypeObject *)&ASCIIDType) < 0) { + return -1; + } + + if (PyArrayInitDTypeMeta_FromSpec(&ASCIIDType, &ASCIIDType_DTypeSpec) < + 0) { + return -1; + } + + ASCIIDType.singleton = PyArray_GetDefaultDescr(&ASCIIDType); + + return 0; +} diff --git a/asciidtype/asciidtype/src/dtype.h b/asciidtype/asciidtype/src/dtype.h new file mode 100644 index 00000000..232c6e63 --- /dev/null +++ b/asciidtype/asciidtype/src/dtype.h @@ -0,0 +1,30 @@ +#ifndef _NPY_DTYPE_H +#define _NPY_DTYPE_H + +// clang-format off +#include +#include "structmember.h" +// clang-format on + +#define PY_ARRAY_UNIQUE_SYMBOL asciidtype_ARRAY_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#define NO_IMPORT_ARRAY +#include "numpy/arrayobject.h" +#include "numpy/experimental_dtype_api.h" +#include "numpy/ndarraytypes.h" + +typedef struct { + PyArray_Descr base; + long size; +} ASCIIDTypeObject; + +extern PyArray_DTypeMeta ASCIIDType; +extern PyTypeObject *ASCIIScalar_Type; + +ASCIIDTypeObject * +new_asciidtype_instance(PyObject *size); + +int +init_ascii_dtype(void); + +#endif /*_NPY_DTYPE_H*/ diff --git a/asciidtype/asciidtype/src/umath.c b/asciidtype/asciidtype/src/umath.c new file mode 100644 index 00000000..6c19ba4d --- /dev/null +++ b/asciidtype/asciidtype/src/umath.c @@ -0,0 +1,12 @@ +#include + +#define PY_ARRAY_UNIQUE_SYMBOL asciidtype_ARRAY_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#define NO_IMPORT_ARRAY +#include "numpy/arrayobject.h" +#include "numpy/experimental_dtype_api.h" +#include "numpy/ndarraytypes.h" +#include "numpy/ufuncobject.h" + +#include "dtype.h" +#include "umath.h" diff --git a/asciidtype/asciidtype/src/umath.h b/asciidtype/asciidtype/src/umath.h new file mode 100644 index 00000000..d000955d --- /dev/null +++ b/asciidtype/asciidtype/src/umath.h @@ -0,0 +1,4 @@ +#ifndef _NPY_UFUNC_H +#define _NPY_UFUNC_H + +#endif /*_NPY_UFUNC_H */ diff --git a/asciidtype/meson.build b/asciidtype/meson.build new file mode 100644 index 00000000..2b566186 --- /dev/null +++ b/asciidtype/meson.build @@ -0,0 +1,48 @@ +project( + 'asciidtype', + 'c', +) + +py_mod = import('python') +py = py_mod.find_installation() + +incdir_numpy = run_command(py, + [ + '-c', + 'import numpy; print(numpy.get_include())' + ], + check: true +).stdout().strip() + +includes = include_directories( + [ + incdir_numpy, + 'asciidtype/src' + ] +) + +srcs = [ + 'asciidtype/src/casts.c', + 'asciidtype/src/casts.h', + 'asciidtype/src/dtype.c', + 'asciidtype/src/asciidtype_main.c', + 'asciidtype/src/umath.c', + 'asciidtype/src/umath.h', +] + +py.install_sources( + [ + 'asciidtype/__init__.py', + 'asciidtype/scalar.py' + ], + subdir: 'asciidtype' +) + +py.extension_module( + '_asciidtype_main', + srcs, + c_args: ['-g', '-O0'], + install: true, + subdir: 'asciidtype', + include_directories: includes +) diff --git a/asciidtype/pyproject.toml b/asciidtype/pyproject.toml new file mode 100644 index 00000000..ae1593be --- /dev/null +++ b/asciidtype/pyproject.toml @@ -0,0 +1,20 @@ +[build-system] +requires = [ + "meson>=0.63.0", + "meson-python", + "patchelf", + "wheel", + "numpy", +] +build-backend = "mesonpy" + +[project] +name = "asciidtype" +description = "A dtype for ASCII data" +version = "0.0.1" +readme = 'README.md' +author = "Nathan Goldbaum" +requires-python = ">=3.9.0" +dependencies = [ + "numpy", +] diff --git a/asciidtype/tests/conftest.py b/asciidtype/tests/conftest.py new file mode 100644 index 00000000..f5870e45 --- /dev/null +++ b/asciidtype/tests/conftest.py @@ -0,0 +1,3 @@ +import os + +os.environ["NUMPY_EXPERIMENTAL_DTYPE_API"] = "1" diff --git a/asciidtype/tests/test_asciidtype.py b/asciidtype/tests/test_asciidtype.py new file mode 100644 index 00000000..4055062e --- /dev/null +++ b/asciidtype/tests/test_asciidtype.py @@ -0,0 +1,11 @@ +from asciidtype import ASCIIDType, ASCIIScalar + + +def test_dtype_creation(): + dtype = ASCIIDType(4) + assert str(dtype) == "ASCIIDType(4)" + + +def test_scalar_creation(): + dtype = ASCIIDType(5) + ASCIIScalar('string', dtype) From 1de1a14216c72654598b7b5afdc969f220172c6d Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Mon, 5 Dec 2022 13:03:00 -0700 Subject: [PATCH 3/9] add compilation database for asciidtype to pre-commit hook --- .pre-commit-config.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2cdd34cb..23e40176 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,6 +9,14 @@ repos: entry: | bash -c 'cd metadatadtype && mkdir -p build && pip install build meson-python patchelf wheel && python -m build --wheel --no-isolation -Cbuilddir=build'; fail_fast: false + - id: generate-compilation-database-asciidtype + name: Generate compilation database [asciidtype] + files: asciidtype/(meson\.build$|.*\.(c|h)$) + language: system + require_serial: true + entry: | + bash -c 'cd asciidtype && mkdir -p build && pip install build meson-python patchelf wheel && python -m build --wheel --no-isolation -Cbuilddir=build'; + fail_fast: false - id: generate-compilation-database-quaddtype name: Generate compilation database [quaddtype] files: quaddtype/(meson\.build$|.*\.(c|h)$) From a907087675d175d51b7898404ff608496762eb40 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 6 Dec 2022 12:13:15 -0700 Subject: [PATCH 4/9] ASCIIDType creation works --- asciidtype/asciidtype/src/casts.c | 169 ++++++++++++++++++++++++++++ asciidtype/asciidtype/src/casts.h | 2 + asciidtype/asciidtype/src/dtype.c | 20 +++- asciidtype/tests/test_asciidtype.py | 2 +- 4 files changed, 186 insertions(+), 7 deletions(-) diff --git a/asciidtype/asciidtype/src/casts.c b/asciidtype/asciidtype/src/casts.c index cec1fcc6..8e90cd48 100644 --- a/asciidtype/asciidtype/src/casts.c +++ b/asciidtype/asciidtype/src/casts.c @@ -9,3 +9,172 @@ #include "casts.h" #include "dtype.h" + +static NPY_CASTING +ascii_to_ascii_resolve_descriptors(PyObject *NPY_UNUSED(self), + PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + if (given_descrs[1] == NULL) { + Py_INCREF(given_descrs[0]); + loop_descrs[1] = given_descrs[0]; + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[0]; + } + + if (((ASCIIDTypeObject *)loop_descrs[0])->size == + ((ASCIIDTypeObject *)loop_descrs[1])->size) { + *view_offset = 0; + return NPY_NO_CASTING; + } + + return NPY_SAME_KIND_CASTING; +} + +static int +ascii_to_ascii_contiguous(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], + npy_intp const NPY_UNUSED(strides[]), + NpyAuxData *NPY_UNUSED(auxdata)) +{ + PyArray_Descr **descrs = context->descriptors; + // for contiguous assignment the sizes of the two dtypes should be + // the same, consider adding an assert to check? + long size = ((ASCIIDTypeObject *)descrs[0])->size; + + npy_intp N = dimensions[0] * size; + char *in = data[0]; + char *out = data[1]; + + while (N--) { + *out = *in; + out++; + in++; + } + + return 0; +} + +static int +ascii_to_ascii_strided(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *NPY_UNUSED(auxdata)) +{ + PyArray_Descr **descrs = context->descriptors; + long in_size = ((ASCIIDTypeObject *)descrs[0])->size; + long out_size = ((ASCIIDTypeObject *)descrs[1])->size; + long copy_size; + + if (out_size > in_size) { + copy_size = in_size; + } + else { + copy_size = out_size; + } + + npy_intp N = dimensions[0]; + char *in = data[0]; + char *out = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + while (N--) { + for (int i = 0; i < copy_size; i++) { + *(out + i) = *(in + i); + } + for (int i = copy_size; i < out_size; i++) { + *(out + i) = '\0'; + } + in += in_stride; + out += out_stride; + } + + return 0; +} + +static int +ascii_to_ascii_unaligned(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + NpyAuxData *NPY_UNUSED(auxdata)) +{ + PyArray_Descr **descrs = context->descriptors; + long in_size = ((ASCIIDTypeObject *)descrs[0])->size; + long out_size = ((ASCIIDTypeObject *)descrs[1])->size; + long copy_size; + + if (out_size > in_size) { + copy_size = in_size; + } + else { + copy_size = out_size; + } + + npy_intp N = dimensions[0]; + char *in = data[0]; + char *out = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + while (N--) { + memcpy(out, in, out_size * sizeof(char)); // NOLINT + for (int i = copy_size; i < out_size; i++) { + *(out + i) = '\0'; + } + in += in_stride; + out += out_stride; + } + + return 0; +} + +static int +ascii_to_ascii_get_loop(PyArrayMethod_Context *context, int aligned, + int NPY_UNUSED(move_references), + const npy_intp *strides, + PyArrayMethod_StridedLoop **out_loop, + NpyAuxData **NPY_UNUSED(out_transferdata), + NPY_ARRAYMETHOD_FLAGS *flags) +{ + PyArray_Descr **descrs = context->descriptors; + + int contig = (strides[0] == ((ASCIIDTypeObject *)descrs[0])->size * + sizeof(char) && + strides[1] == ((ASCIIDTypeObject *)descrs[1])->size * + sizeof(char)); + + if (aligned && contig) { + *out_loop = (PyArrayMethod_StridedLoop *)&ascii_to_ascii_contiguous; + } + else if (aligned) { + *out_loop = (PyArrayMethod_StridedLoop *)&ascii_to_ascii_strided; + } + else { + *out_loop = (PyArrayMethod_StridedLoop *)&ascii_to_ascii_unaligned; + } + + *flags = 0; + return 0; +} + +static PyArray_DTypeMeta *a2a_dtypes[2] = {NULL, NULL}; + +static PyType_Slot a2a_slots[] = { + {NPY_METH_resolve_descriptors, &ascii_to_ascii_resolve_descriptors}, + {_NPY_METH_get_loop, &ascii_to_ascii_get_loop}, + {0, NULL}}; + +PyArrayMethod_Spec ASCIIToASCIICastSpec = { + .name = "cast_ASCIIDType_to_ASCIIDType", + .nin = 1, + .nout = 1, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .casting = NPY_SAME_KIND_CASTING, + .dtypes = a2a_dtypes, + .slots = a2a_slots, +}; diff --git a/asciidtype/asciidtype/src/casts.h b/asciidtype/asciidtype/src/casts.h index af8ca056..f403fe8c 100644 --- a/asciidtype/asciidtype/src/casts.h +++ b/asciidtype/asciidtype/src/casts.h @@ -10,4 +10,6 @@ #include "numpy/experimental_dtype_api.h" #include "numpy/ndarraytypes.h" +extern PyArrayMethod_Spec ASCIIToASCIICastSpec; + #endif /* _NPY_CASTS_H */ diff --git a/asciidtype/asciidtype/src/dtype.c b/asciidtype/asciidtype/src/dtype.c index 260d287d..118a79d2 100644 --- a/asciidtype/asciidtype/src/dtype.c +++ b/asciidtype/asciidtype/src/dtype.c @@ -161,13 +161,12 @@ asciidtype_new(PyTypeObject *NPY_UNUSED(cls), PyObject *args, PyObject *kwds) return NULL; } if (size == NULL) { - PyErr_SetString( - PyExc_TypeError, - "Must provide a size to instantiate an ASCIIDType instance"); - return NULL; + size = PyLong_FromLong(0); } - return (PyObject *)new_asciidtype_instance(size); + PyObject *ret = (PyObject *)new_asciidtype_instance(size); + Py_DECREF(size); + return ret; } static void @@ -212,10 +211,13 @@ PyArray_DTypeMeta ASCIIDType = { int init_ascii_dtype(void) { + static PyArrayMethod_Spec *casts[] = {&ASCIIToASCIICastSpec, NULL}; + PyArrayDTypeMeta_Spec ASCIIDType_DTypeSpec = { .flags = NPY_DT_PARAMETRIC, .typeobj = ASCIIScalar_Type, .slots = ASCIIDType_Slots, + .casts = casts, }; /* Loaded dynamically, so may need to be set here: */ ((PyObject *)&ASCIIDType)->ob_type = &PyArrayDTypeMeta_Type; @@ -229,7 +231,13 @@ init_ascii_dtype(void) return -1; } - ASCIIDType.singleton = PyArray_GetDefaultDescr(&ASCIIDType); + PyArray_Descr *singleton = PyArray_GetDefaultDescr(&ASCIIDType); + + if (singleton == NULL) { + return -1; + } + + ASCIIDType.singleton = singleton; return 0; } diff --git a/asciidtype/tests/test_asciidtype.py b/asciidtype/tests/test_asciidtype.py index 4055062e..290fdd2f 100644 --- a/asciidtype/tests/test_asciidtype.py +++ b/asciidtype/tests/test_asciidtype.py @@ -7,5 +7,5 @@ def test_dtype_creation(): def test_scalar_creation(): - dtype = ASCIIDType(5) + dtype = ASCIIDType(7) ASCIIScalar('string', dtype) From fd500a60a6add777bfc63f30e0a76984af850662 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 6 Dec 2022 13:19:36 -0700 Subject: [PATCH 5/9] add asciidtype tests to CI run --- .github/workflows/ci.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d2afb7dc..e6f44116 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,12 @@ jobs: run: | pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy python -m pip install -U pip build pytest unyt wheel meson ninja meson-python patchelf + - name: Install asciidtype + run: | + CFLAGS="-Werror" python -m pip install . --no-build-isolation + - name: Run asciidtype tests + run: | + pytest -vvv --color=yes - name: Install metadatadtype working-directory: metadatadtype run: | From c6f026280e5f28d421c2f68a4dc3ad5394005ddc Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 6 Dec 2022 13:22:00 -0700 Subject: [PATCH 6/9] add missing decref --- asciidtype/asciidtype/src/dtype.c | 1 + 1 file changed, 1 insertion(+) diff --git a/asciidtype/asciidtype/src/dtype.c b/asciidtype/asciidtype/src/dtype.c index 118a79d2..8495eb32 100644 --- a/asciidtype/asciidtype/src/dtype.c +++ b/asciidtype/asciidtype/src/dtype.c @@ -21,6 +21,7 @@ get_value(PyObject *scalar) PyObject *res_bytes = PyUnicode_AsASCIIString(value); Py_DECREF(value); char *res = PyBytes_AsString(res_bytes); + Py_DECREF(res_bytes); if (res == NULL) { return NULL; } From d34c65668d2dce4e025ce2f385de9d04155230f5 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 6 Dec 2022 14:16:10 -0700 Subject: [PATCH 7/9] array creation works --- asciidtype/asciidtype/scalar.py | 5 ++- asciidtype/asciidtype/src/dtype.c | 65 +++++++++++++++++++---------- asciidtype/tests/test_asciidtype.py | 9 ++++ 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/asciidtype/asciidtype/scalar.py b/asciidtype/asciidtype/scalar.py index c28d0261..1a39780f 100644 --- a/asciidtype/asciidtype/scalar.py +++ b/asciidtype/asciidtype/scalar.py @@ -6,5 +6,8 @@ def __init__(self, value, dtype): self.value = value self.dtype = dtype + def __str__(self): + return str(self.value) + def __repr__(self): - return f"{self.value}" + return repr(self.value) diff --git a/asciidtype/asciidtype/src/dtype.c b/asciidtype/asciidtype/src/dtype.c index 8495eb32..fc550dff 100644 --- a/asciidtype/asciidtype/src/dtype.c +++ b/asciidtype/asciidtype/src/dtype.c @@ -4,28 +4,34 @@ PyTypeObject *ASCIIScalar_Type = NULL; -static char * +static PyObject * get_value(PyObject *scalar) { + PyObject *ret_bytes = NULL; PyTypeObject *scalar_type = Py_TYPE(scalar); - if (scalar_type != ASCIIScalar_Type) { - PyErr_SetString(PyExc_TypeError, - "Can only store ASCIIScalar in a ASCIIDType array."); - return NULL; + if (scalar_type == &PyUnicode_Type) { + // attempt to decode as ASCII + ret_bytes = PyUnicode_AsASCIIString(scalar); + if (ret_bytes == NULL) { + PyErr_SetString( + PyExc_TypeError, + "Can only store ASCII text in a ASCIIDType array."); + } } - - PyObject *value = PyObject_GetAttrString(scalar, "value"); - if (value == NULL) { + else if (scalar_type != ASCIIScalar_Type) { + PyErr_SetString(PyExc_TypeError, + "Can only store ASCII text in a ASCIIDType array."); return NULL; } - PyObject *res_bytes = PyUnicode_AsASCIIString(value); - Py_DECREF(value); - char *res = PyBytes_AsString(res_bytes); - Py_DECREF(res_bytes); - if (res == NULL) { - return NULL; + else { + PyObject *value = PyObject_GetAttrString(scalar, "value"); + if (value == NULL) { + return NULL; + } + ret_bytes = PyUnicode_AsASCIIString(value); + Py_DECREF(value); } - return res; + return ret_bytes; } /* @@ -100,12 +106,31 @@ ascii_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls), static int asciidtype_setitem(ASCIIDTypeObject *descr, PyObject *obj, char *dataptr) { - char *value = get_value(obj); + PyObject *value = get_value(obj); if (value == NULL) { return -1; } - memcpy(dataptr, value, descr->size * sizeof(char)); // NOLINT + Py_ssize_t len = PyBytes_Size(value); + + size_t copysize; + + if (len > descr->size) { + copysize = descr->size; + } + else { + copysize = len; + } + + char *char_value = PyBytes_AsString(value); + + memcpy(dataptr, char_value, copysize * sizeof(char)); // NOLINT + + for (int i = copysize; i < descr->size; i++) { + dataptr[i] = '\0'; + } + + Py_DECREF(value); return 0; } @@ -113,11 +138,7 @@ asciidtype_setitem(ASCIIDTypeObject *descr, PyObject *obj, char *dataptr) static PyObject * asciidtype_getitem(ASCIIDTypeObject *descr, char *dataptr) { - char *val = NULL; - /* get the value */ - memcpy(val, dataptr, descr->size * sizeof(char)); // NOLINT - - PyObject *val_obj = PyUnicode_FromStringAndSize(val, descr->size); + PyObject *val_obj = PyUnicode_FromString(dataptr); if (val_obj == NULL) { return NULL; } diff --git a/asciidtype/tests/test_asciidtype.py b/asciidtype/tests/test_asciidtype.py index 290fdd2f..210bd337 100644 --- a/asciidtype/tests/test_asciidtype.py +++ b/asciidtype/tests/test_asciidtype.py @@ -1,3 +1,5 @@ +import numpy as np + from asciidtype import ASCIIDType, ASCIIScalar @@ -9,3 +11,10 @@ def test_dtype_creation(): def test_scalar_creation(): dtype = ASCIIDType(7) ASCIIScalar('string', dtype) + + +def test_creation_with_explicit_dtype(): + dtype = ASCIIDType(7) + arr = np.array(["hello", "this", "is", "an", "array"], dtype=dtype) + assert repr(arr) == ( + "array(['hello', 'this', 'is', 'an', 'array'], dtype=ASCIIDType(7))") From 808970fe7df2c24812c855deb9829a7f57f154a3 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Tue, 6 Dec 2022 14:21:09 -0700 Subject: [PATCH 8/9] fix github actions workflow --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e6f44116..721931ad 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,9 +21,11 @@ jobs: pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy python -m pip install -U pip build pytest unyt wheel meson ninja meson-python patchelf - name: Install asciidtype + working-directory: asciidtype run: | CFLAGS="-Werror" python -m pip install . --no-build-isolation - name: Run asciidtype tests + working-directory: asciidtype run: | pytest -vvv --color=yes - name: Install metadatadtype From 6be83002116042cd1d0552b0de295ebf4fcd22b3 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Wed, 7 Dec 2022 10:26:14 -0700 Subject: [PATCH 9/9] only have one loop for unaligned or strided casts --- asciidtype/asciidtype/src/casts.c | 51 +++++-------------------------- 1 file changed, 7 insertions(+), 44 deletions(-) diff --git a/asciidtype/asciidtype/src/casts.c b/asciidtype/asciidtype/src/casts.c index 8e90cd48..61e5ac01 100644 --- a/asciidtype/asciidtype/src/casts.c +++ b/asciidtype/asciidtype/src/casts.c @@ -62,46 +62,11 @@ ascii_to_ascii_contiguous(PyArrayMethod_Context *context, char *const data[], } static int -ascii_to_ascii_strided(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *NPY_UNUSED(auxdata)) -{ - PyArray_Descr **descrs = context->descriptors; - long in_size = ((ASCIIDTypeObject *)descrs[0])->size; - long out_size = ((ASCIIDTypeObject *)descrs[1])->size; - long copy_size; - - if (out_size > in_size) { - copy_size = in_size; - } - else { - copy_size = out_size; - } - - npy_intp N = dimensions[0]; - char *in = data[0]; - char *out = data[1]; - npy_intp in_stride = strides[0]; - npy_intp out_stride = strides[1]; - - while (N--) { - for (int i = 0; i < copy_size; i++) { - *(out + i) = *(in + i); - } - for (int i = copy_size; i < out_size; i++) { - *(out + i) = '\0'; - } - in += in_stride; - out += out_stride; - } - - return 0; -} - -static int -ascii_to_ascii_unaligned(PyArrayMethod_Context *context, char *const data[], - npy_intp const dimensions[], npy_intp const strides[], - NpyAuxData *NPY_UNUSED(auxdata)) +ascii_to_ascii_strided_or_unaligned(PyArrayMethod_Context *context, + char *const data[], + npy_intp const dimensions[], + npy_intp const strides[], + NpyAuxData *NPY_UNUSED(auxdata)) { PyArray_Descr **descrs = context->descriptors; long in_size = ((ASCIIDTypeObject *)descrs[0])->size; @@ -151,11 +116,9 @@ ascii_to_ascii_get_loop(PyArrayMethod_Context *context, int aligned, if (aligned && contig) { *out_loop = (PyArrayMethod_StridedLoop *)&ascii_to_ascii_contiguous; } - else if (aligned) { - *out_loop = (PyArrayMethod_StridedLoop *)&ascii_to_ascii_strided; - } else { - *out_loop = (PyArrayMethod_StridedLoop *)&ascii_to_ascii_unaligned; + *out_loop = (PyArrayMethod_StridedLoop + *)&ascii_to_ascii_strided_or_unaligned; } *flags = 0;