8000 Add asciidtype by ngoldbaum · Pull Request #10 · numpy/numpy-user-dtypes · GitHub
[go: up one dir, main page]

Skip to content

Add asciidtype #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ jobs:
run: |
pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
python -m pip install -U pip build pytest unyt wheel meson ninja meson-python patchelf
- name: Install asciidtype
working-directory: asciidtype
run: |
CFLAGS="-Werror" python -m pip install . --no-build-isolation
- name: Run asciidtype tests
working-directory: asciidtype
run: |
pytest -vvv --color=yes
- name: Install metadatadtype
working-directory: metadatadtype
run: |
Expand Down
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ repos:
entry: |
bash -c 'cd metadatadtype && mkdir -p build && pip install build meson-python patchelf wheel && python -m build --wheel --no-isolation -Cbuilddir=build';
fail_fast: false
- id: generate-compilation-database-asciidtype
name: Generate compilation database [asciidtype]
files: asciidtype/(meson\.build$|.*\.(c|h)$)
language: system
require_serial: true
entry: |
bash -c 'cd asciidtype && mkdir -p build && pip install build meson-python patchelf wheel && python -m build --wheel --no-isolation -Cbuilddir=build';
fail_fast: false
- id: generate-compilation-database-quaddtype
name: Generate compilation database [quaddtype]
files: quaddtype/(meson\.build$|.*\.(c|h)$)
Expand Down
37 changes: 37 additions & 0 deletions asciidtype/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# A clang-format style that approximates Python's PEP 7
# Useful for IDE integration
#
# Based on Paul Ganssle's version at
# https://gist.github.com/pganssle/0e3a5f828b4d07d79447f6ced8e7e4db
# and modified for NumPy
BasedOnStyle: Google
AlignAfterOpenBracket: Align
AllowShortEnumsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: false
AlwaysBreakAfterReturnType: TopLevel
BreakBeforeBraces: Stroustrup
ColumnLimit: 79
ContinuationIndentWidth: 8
DerivePointerAlignment: false
IndentWidth: 4
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^[<"](Python|structmember|pymem)\.h'
Priority: -3
CaseSensitive: true
- Regex: '^"numpy/'
Priority: -2
- Regex: '^"(npy_pycompat|npy_config)'
Priority: -1
- Regex: '^"[[:alnum:]_.]+"'
Priority: 1
- Regex: '^<[[:alnum:]_.]+"'
Priority: 2
Language: Cpp
PointerAlignment: Right
ReflowComments: true
SpaceBeforeParens: ControlStatements
SpacesInParentheses: false
StatementMacros: [PyObject_HEAD, PyObject_VAR_HEAD, PyObject_HEAD_EXTRA]
TabWidth: 4
UseTab: Never
2 changes: 2 additions & 0 deletions asciidtype/.flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[flake8]
per-file-ignores = __init__.py:F401
3 changes: 3 additions & 0 deletions asciidtype/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dist/
.mesonpy*.ini
__pycache__
27 changes: 27 additions & 0 deletions asciidtype/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# A dtype that stores ASCII data

This is a simple proof-of-concept dtype using the (as of late 2022) experimental
[new dtype
implementation](https://numpy.org/neps/nep-0041-improved-dtype-support.html) in
NumPy.

## Building

Ensure Meson and NumPy are installed in the python environment you would like to use:

```
$ python3 -m pip install meson meson-python numpy build patchelf
```

Build with meson, create a wheel, and install it

```
$ rm -r dist/
$ meson build
$ python -m build --wheel -Cbuilddir=build
$ python -m pip install dist/asciidtype*.whl
```

The `mesonpy` build backend for pip [does not currently support editable
installs](https://github.com/mesonbuild/meson-python/issues/47), so `pip install
-e .` will not work.
10 changes: 10 additions & 0 deletions asciidtype/asciidtype/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""A dtype for working with ASCII data

This is an example usage of the experimental new dtype API
in Numpy and is not intended for any real purpose.
"""

from .scalar import ASCIIScalar # isort: skip
from ._asciidtype_main import ASCIIDType

__all__ = ["ASCIIDType", "ASCIIScalar"]
13 changes: 13 additions & 0 deletions asciidtype/asciidtype/scalar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""A scalar type needed by the dtype machinery."""


class ASCIIScalar:
def __init__(self, value, dtype):
self.value = value
self.dtype = dtype

def __str__(self):
return str(self.value)

def __repr__(self):
return repr(self.value)
57 changes: 57 additions & 0 deletions asciidtype/asciidtype/src/asciidtype_main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include <Python.h>

#define PY_ARRAY_UNIQUE_SYMBOL asciidtype_ARRAY_API
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include "numpy/arrayobject.h"
#include "numpy/experimental_dtype_api.h"

#include "dtype.h"

static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
.m_name = "asciidtype_main",
.m_size = -1,
};

/* Module initialization function */
PyMODINIT_FUNC
PyInit__asciidtype_main(void)
{
if (_import_array() < 0) {
return NULL;
}
if (import_experimental_dtype_api(5) < 0) {
return NULL;
}

PyObject *m = PyModule_Create(&moduledef);
if (m == NULL) {
return NULL;
}

PyObject *mod = PyImport_ImportModule("asciidtype");
if (mod == NULL) {
goto error;
}
ASCIIScalar_Type =
(PyTypeObject *)PyObject_GetAttrString(mod, "ASCIIScalar");
Py_DECREF(mod);

if (ASCIIScalar_Type == NULL) {
goto error;
}

if (init_ascii_dtype() < 0) {
goto error;
}

if (PyModule_AddObject(m, "ASCIIDType", (PyObject *)&ASCIIDType) < 0) {
goto error;
}

return m;

error:
Py_DECREF(m);
return NULL;
}
143 changes: 143 additions & 0 deletions asciidtype/asciidtype/src/casts.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#include <Python.h>

#define PY_ARRAY_UNIQUE_SYMBOL asciidtype_ARRAY_API
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#define NO_IMPORT_ARRAY
#include "numpy/arrayobject.h"
#include "numpy/experimental_dtype_api.h"
#include "numpy/ndarraytypes.h"

#include "casts.h"
#include "dtype.h"

static NPY_CASTING
ascii_to_ascii_resolve_descriptors(PyObject *NPY_UNUSED(self),
PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]),
PyArray_Descr *given_descrs[2],
PyArray_Descr *loop_descrs[2],
npy_intp *view_offset)
{
Py_INCREF(given_descrs[0]);
loop_descrs[0] = given_descrs[0];
if (given_descrs[1] == NULL) {
Py_INCREF(given_descrs[0]);
loop_descrs[1] = given_descrs[0];
}
else {
Py_INCREF(given_descrs[1]);
loop_descrs[1] = given_descrs[0];
}

if (((ASCIIDTypeObject *)loop_descrs[0])->size ==
((ASCIIDTypeObject *)loop_descrs[1])->size) {
*view_offset = 0;
return NPY_NO_CASTING;
}

return NPY_SAME_KIND_CASTING;
}

static int
ascii_to_ascii_contiguous(PyArrayMethod_Context *context, char *const data[],
npy_intp const dimensions[],
npy_intp const NPY_UNUSED(strides[]),
NpyAuxData *NPY_UNUSED(auxdata))
{
PyArray_Descr **descrs = context->descriptors;
// for contiguous assignment the sizes of the two dtypes should be
// the same, consider adding an assert to check?
long size = ((ASCIIDTypeObject *)descrs[0])->size;

npy_intp N = dimensions[0] * size;
char *in = data[0];
char *out = data[1];

while (N--) {
*out = *in;
out++;
in++;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could even use plain memcpy if you like.


return 0;
}

static int
ascii_to_ascii_strided_or_unaligned(PyArrayMethod_Context *context,
char *const data[],
npy_intp const dimensions[],
npy_intp const strides[],
NpyAuxData *NPY_UNUSED(auxdata))
{
PyArray_Descr **descrs = context->descriptors;
long in_size = ((ASCIIDTypeObject *)descrs[0])->size;
long out_size = ((ASCIIDTypeObject *)descrs[1])->size;
long copy_size;

if (out_size > in_size) {
copy_size = in_size;
}
else {
copy_size = out_size;
}

npy_intp N = dimensions[0];
char *in = data[0];
char *out = data[1];
npy_intp in_stride = strides[0];
npy_intp out_stride = strides[1];

while (N--) {
memcpy(out, in, out_size * sizeof(char)); // NOLINT
for (int i = copy_size; i < out_size; i++) {
*(out + i) = '\0';
}
in += in_stride;
out += out_stride;
}

return 0;
}

static int
ascii_to_ascii_get_loop(PyArrayMethod_Context *context, int aligned,
int NPY_UNUSED(move_references),
const npy_intp *strides,
PyArrayMethod_StridedLoop **out_loop,
NpyAuxData **NPY_UNUSED(out_transferdata),
NPY_ARRAYMETHOD_FLAGS *flags)
{
PyArray_Descr **descrs = context->descriptors;

int contig = (strides[0] == ((ASCIIDTypeObject *)descrs[0])->size *
sizeof(char) &&
strides[1] == ((ASCIIDTypeObject *)descrs[1])->size *
sizeof(char));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to me that your contig loop also assumes descrs[0]->size == descrs[1]->size which is not checked below?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I was planning to relax that requirement on a second pass, but added a hard error for now.


if (aligned && contig) {
*out_loop = (PyArrayMethod_StridedLoop *)&ascii_to_ascii_contiguous;
}
else {
*out_loop = (PyArrayMethod_StridedLoop
*)&ascii_to_ascii_strided_or_unaligned;
}

*flags = 0;
return 0;
}

static PyArray_DTypeMeta *a2a_dtypes[2] = {NULL, NULL};

static PyType_Slot a2a_slots[] = {
{NPY_METH_resolve_descriptors, &ascii_to_ascii_resolve_descriptors},
{_NPY_METH_get_loop, &ascii_to_ascii_get_loop},
{0, NULL}};

PyArrayMethod_Spec ASCIIToASCIICastSpec = {
.name = "cast_ASCIIDType_to_ASCIIDType",
.nin = 1,
.nout = 1,
.flags = NPY_METH_SUPPORTS_UNALIGNED,
.casting = NPY_SAME_KIND_CASTING,
.dtypes = a2a_dtypes,
.slots = a2a_slots,
};
15 changes: 15 additions & 0 deletions asciidtype/asciidtype/src/casts.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef _NPY_CASTS_H
#define _NPY_CASTS_H

#include <Python.h>

#define PY_ARRAY_UNIQUE_SYMBOL asciidtype_ARRAY_API
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#define NO_IMPORT_ARRAY
#include "numpy/arrayobject.h"
#include "numpy/experimental_dtype_api.h"
#include "numpy/ndarraytypes.h"

extern PyArrayMethod_Spec ASCIIToASCIICastSpec;

#endif /* _NPY_CASTS_H */
Loading
0