8000 MAINT,API: Make c_metadata, fields, and names only exist on old-style… · numpy/numpy@2c95083 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2c95083

Browse files
authored
MAINT,API: Make c_metadata, fields, and names only exist on old-style dtypes (#25802)
This commit adds accessor inline functions for the additional dtypes fields metadata, c_metadata, fields, and names and internally removes them from the descriptor struct requiring a cast to `_PyArray_LegacyDescr` to access (and especially modify) those fields. The mental model should be that all legacy DTypes inherit from a DType subclass which extends the normal one. It does not yet change the public API: this will happen in a follow up.
1 parent 15691c3 commit 2c95083

36 files changed

+514
-323
lines changed

doc/source/reference/c-api/array.rst

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,34 @@ General check of Python Type
753753
:c:data:`PyArray_Type` whose dimensionality is 0.
754754
755755
756+
Data-type accessors
757+
~~~~~~~~~~~~~~~~~~~
758+
759+
Some of the descriptor attributes may not always be defined and should or
760+
cannot not be accessed directly.
761+
762+
.. versionchanged:: 2.0
763+
Prior to NumPy 2.0 the ABI was different but unnecessary large for user
764+
DTypes. These accessors were all added in 2.0.
765+
766+
.. c:function:: PyObject *PyDataType_METADATA(PyArray_Descr *descr)
767+
The Metadata attached to a dtype, either ``NULL`` or a dictionary.
768+
769+
.. c:function:: PyObject *PyDataType_NAMES(PyArray_Descr *descr)
770+
``NULL`` or a list of structured field names attached to a dtype,
771+
this list should not be mutated, NumPy may change the way fields are
772+
stored in the future.
773+
774+
.. c:function:: PyObject *PyDataType_FIELDS(PyArray_Descr *descr)
775+
``NULL``, ``None``, or a dict of structured dtype fields, this dict must
776+
not be mutated, NumPy may change the way fields are stored in the future.
777+
778+
.. c:function:: NpyAuxData *PyDataType_C_METADATA(PyArray_Descr *descr)
779+
C-metadata object attached to a descriptor. This accessor should not
780+
be needed usually. The C-Metadata field does provide access to the
781+
datetime/timedelta time unit information.
782+
783+
756784
Data-type checking
757785
~~~~~~~~~~~~~~~~~~
758786

doc/source/release/2.0.0-notes.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,17 @@ for simpler backporting.
473473
(`gh-25866 <https://github.com/numpy/numpy/pull/25866>`__)
474474

475475

476+
Structured dtype information access through functions
477+
-----------------------------------------------------
478+
The dtype structures fields `c_metadata``, ``names``,
479+
``fields``, and ``subarray`` must now be accessed through new
480+
functions following the same names, such as ``PyDataType_NAMES``.
481+
Direct access of the fields is not valid as they do not exist for
482+
all ``PyArray_Descr`` instances.
483+
The ``metadata`` field is kept, but the macro version should also be preferred.
484+
485+
(`gh-25802 <https://github.com/numpy/numpy/pull/25802>`__)
486+
476487
NumPy 2.0 C API removals
477488
========================
478489

@@ -552,6 +563,12 @@ NumPy 2.0 C API removals
552563

553564
(`gh-25292 <https://github.com/numpy/numpy/pull/25292>`__)
554565

566+
567+
* ``PyDataType_GetDatetimeMetaData`` has been removed, it did not actually
568+
do anything since at least NumPy 1.7.
569+
570+
(`gh-25802 <https://github.com/numpy/numpy/pull/25802>`__)
571+
555572
``PyArray_GetCastFunc`` was removed
556573
-----------------------------------
557574

numpy/_core/include/numpy/arrayscalars.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,12 @@ typedef struct {
150150
typedef struct {
151151
PyObject_VAR_HEAD
152152
char *obval;
153+
#if defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD
154+
/* Internally use the subclass to allow accessing names/fields */
155+
_PyArray_LegacyDescr *descr;
156+
#else
153157
PyArray_Descr *descr;
158+
#endif
154159
int flags;
155160
PyObject *base;
156161
#if NPY_FEATURE_VERSION >= NPY_1_20_API_VERSION

numpy/_core/include/numpy/ndarraytypes.h

Lines changed: 83 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,10 @@ typedef struct {
573573
NPY_ITEM_IS_POINTER | NPY_ITEM_REFCOUNT | \
574574
NPY_NEEDS_INIT | NPY_NEEDS_PYAPI)
575575

576+
#if !(defined(NPY_INTERNAL_BUILD) && NPY_INTERNAL_BUILD)
577+
/*
578+
* Public version of the Descriptor struct
579+
*/
576580
typedef struct _PyArray_Descr {
577581
PyObject_HEAD
578582
/*
@@ -633,6 +637,55 @@ typedef struct _PyArray_Descr {
633637

634638
} PyArray_Descr;
635639

640+
#else /* internal build */
641+
642+
// TODO: This split definition only exists for piece-meal transitioning
643+
// as it allows change internal use without worrying about public API.
644+
typedef struct _PyArray_Descr {
645+
PyObject_HEAD
646+
PyTypeObject *typeobj;
647+
char kind;
648+
char type;
649+
char byteorder;
650+
char flags;
651+
int type_num;
652+
int elsize;
653+
int alignment;
654+
/* except hash, the below fields will be legacy descriptor specific */
655+
struct _arr_descr *unreachable_subarray;
656+
PyObject *unreachable_fields;
657+
PyObject *unreachable_names;
658+
PyArray_ArrFuncs *_former_f;
659+
PyObject *metadata;
660+
NpyAuxData *unreachable_c_metadata;
661+
npy_hash_t hash;
662+
} PyArray_Descr;
663+
664+
#endif /* internal build */
665+
666+
667+
/*
668+
* Semi-private struct with additional field of legacy descriptors (must
669+
* check NPY_DT_is_legacy before casting/accessing).
670+
*/
671+
typedef struct {
672+
PyObject_HEAD
673+
PyTypeObject *typeobj;
674+
char kind;
675+
char type;
676+
char byteorder;
677+
char flags;
678+
int type_num;
679+
int elsize;
680+
int alignment;
681+
struct _arr_descr *subarray;
682+
PyObject *fields;
683+
PyObject *names;
684+
PyArray_ArrFuncs *_former_f;
685+
PyObject *metadata;
686+
NpyAuxData *c_metadata;
687+
npy_hash_t hash;
688+
} _PyArray_LegacyDescr;
636689

637690

638691
/*
@@ -1620,6 +1673,7 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags)
16201673
#define PyTypeNum_ISOBJECT(type) ((type) == NPY_OBJECT)
16211674

16221675

1676+
#define PyDataType_ISLEGACY(dtype) ((dtype)->type_num < NPY_VSTRING && ((dtype)->type_num >= 0))
16231677
#define PyDataType_ISBOOL(obj) PyTypeNum_ISBOOL(((PyArray_Descr*)(obj))->type_num)
16241678
#define PyDataType_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(((PyArray_Descr*)(obj))->type_num)
16251679
#define PyDataType_ISSIGNED(obj) PyTypeNum_ISSIGNED(((PyArray_Descr*)(obj))->type_num)
@@ -1633,8 +1687,8 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags)
16331687
#define PyDataType_ISUSERDEF(obj) PyTypeNum_ISUSERDEF(((PyArray_Descr*)(obj))->type_num)
16341688
#define PyDataType_ISEXTENDED(obj) PyTypeNum_ISEXTENDED(((PyArray_Descr*)(obj))->type_num)
16351689
#define PyDataType_ISOBJECT(obj) PyTypeNum_ISOBJECT(((PyArray_Descr*)(obj))->type_num)
1636-
#define PyDataType_HASFIELDS(obj) (((PyArray_Descr *)(obj))->names != NULL)
1637-
#define PyDataType_HASSUBARRAY(dtype) ((dtype)->subarray != NULL)
1690+
#define PyDataType_HASFIELDS(obj) (PyDataType_ISLEGACY((PyArray_Descr*)(obj)) && ((_PyArray_LegacyDescr *)(obj))->names != NULL)
1691+
#define PyDataType_HASSUBARRAY(dtype) (PyDataType_ISLEGACY(dtype) && ((_PyArray_LegacyDescr *)dtype)->subarray != NULL)
16381692
#define PyDataType_ISUNSIZED(dtype) ((dtype)->elsize == 0 && \
16391693
!PyDataType_HASFIELDS(dtype))
16401694
#define PyDataType_MAKEUNSIZED(dtype) ((dtype)->elsize = 0)
@@ -1643,6 +1697,33 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags)
16431697
* npy_2_compat.h and are not defined here.
16441698
*/
16451699

1700+
/*
1701+
* Access inline functions for legacy fields. Except metadata these fields are
1702+
* specific to structured arrays (names, fields) or datetime (c_metadata).
1703+
* Although technically they may be used (but normally ignored) on non-struct
1704+
* dtypes as well.
1705+
* For structured dtypes, new ways to define and access fields make sense.
1706+
*/
1707+
static inline PyArray_ArrayDescr *
1708+
PyDataType_SUBARRAY(PyArray_Descr *dtype) {
1709+
return !PyDataType_ISLEGACY(dtype) ? NULL : ((_PyArray_LegacyDescr *)dtype)->subarray;
1710+
}
1711+
1712+
static inline PyObject *
1713+
PyDataType_NAMES(PyArray_Descr *dtype) {
1714+
return !PyDataType_ISLEGACY(dtype) ? NULL : ((_PyArray_LegacyDescr *)dtype)->names;
1715+
}
1716+
1717+
static inline PyObject *
1718+
PyDataType_FIELDS(PyArray_Descr *dtype) {
1719+
return !PyDataType_ISLEGACY(dtype) ? NULL : ((_PyArray_LegacyDescr *)dtype)->fields;
1720+
}
1721+
1722+
static inline NpyAuxData *
1723+
PyDataType_C_METADATA(PyArray_Descr *dtype) {
1724+
return !PyDataType_ISLEGACY(dtype) ? NULL : ((_PyArray_LegacyDescr *)dtype)->c_metadata;
1725+
}
1726+
16461727
#define PyArray_ISBOOL(obj) PyTypeNum_ISBOOL(PyArray_TYPE(obj))
16471728
#define PyArray_ISUNSIGNED(obj) PyTypeNum_ISUNSIGNED(PyArray_TYPE(obj))
16481729
#define PyArray_ISSIGNED(obj) PyTypeNum_ISSIGNED(PyArray_TYPE(obj))

numpy/_core/include/numpy/npy_1_7_deprecated_api.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,6 @@
6666
/* This way of accessing the default type is deprecated as of NumPy 1.7 */
6767
#define PyArray_DEFAULT NPY_DEFAULT_TYPE
6868

69-
/* These DATETIME bits aren't used internally */
70-
#define PyDataType_GetDatetimeMetaData(descr) \
71-
((descr->metadata == NULL) ? NULL : \
72-
((PyArray_DatetimeMetaData *)(PyCapsule_GetPointer( \
73-
PyDict_GetItemString( \
74-
descr->metadata, NPY_METADATA_DTSTR), NULL))))
75-
7669
/*
7770
* Deprecated as of NumPy 1.7, this kind of shortcut doesn't
7871
* belong in the public API.

numpy/_core/include/numpy/npy_2_compat.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ PyArray_ImportNumPyAPI()
126126
/* Aliases of 2.x names to 1.x only equivalent names */
127127
#define NPY_NTYPES NPY_NTYPES_LEGACY
128128
#define PyArray_DescrProto PyArray_Descr
129+
/* NumPy 2 definition always works, but add it for 1.x only */
130+
#define PyDataType_ISLEGACY(dtype) (1)
129131
#else
130132
#define NPY_DEFAULT_INT \
131133
(PyArray_RUNTIME_VERSION >= NPY_2_0_API_VERSION ? NPY_INTP : NPY_LONG)

numpy/_core/src/multiarray/_multiarray_tests.c.src

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ static PyObject *
496496
create_custom_field_dtype(PyObject *NPY_UNUSED(mod), PyObject *args)
497497
{
498498
PyArray_DescrProto proto;
499-
PyArray_Descr *dtype;
499+
_PyArray_LegacyDescr *dtype; /* Is checked for void, so legacy is OK */
500500
PyTypeObject *scalar_type;
501501
int error_path;
502502

@@ -510,7 +510,7 @@ create_custom_field_dtype(PyObject *NPY_UNUSED(mod), PyObject *args)
510510
if (dtype->type_num != NPY_VOID || dtype->fields == NULL ||
511511
!PyDict_CheckExact(dtype->fields) ||
512512
PyTuple_Size(dtype->names) != 1 ||
513-
!PyDataType_REFCHK(dtype) ||
513+
!PyDataType_REFCHK((PyArray_Descr *)dtype) ||
514514
dtype->elsize != sizeof(PyObject *)) {
515515
PyErr_SetString(PyExc_ValueError,
516516
"Bad dtype passed to test function, must be an object "
@@ -531,7 +531,7 @@ create_custom_field_dtype(PyObject *NPY_UNUSED(mod), PyObject *args)
531531
proto.subarray = dtype->subarray;
532532
proto.fields = dtype->fields;
533533
proto.names = dtype->names;
534-
proto.f = PyDataType_GetArrFuncs(dtype);
534+
proto.f = PyDataType_GetArrFuncs((PyArray_Descr *)dtype);
535535
proto.metadata = dtype->metadata;
536536
proto.c_metadata = dtype->c_metadata;
537537

numpy/_core/src/multiarray/array_coercion.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1267,7 +1267,8 @@ PyArray_DiscoverDTypeAndShape(
12671267
flags |= DISCOVER_STRINGS_AS_SEQUENCES;
12681268
}
12691269
else if (requested_descr->type_num == NPY_VOID &&
1270-
(requested_descr->names || requested_descr->subarray)) {
1270+
(((_PyArray_LegacyDescr *)requested_descr)->names
1271+
|| ((_PyArray_LegacyDescr *)requested_descr)->subarray)) {
12711272
/* Void is a chimera, in that it may or may not be structured... */
12721273
flags |= DISCOVER_TUPLES_AS_ELEMENTS;
12731274
}

numpy/_core/src/multiarray/arrayobject.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -598,11 +598,9 @@ _void_compare(PyArrayObject *self, PyArrayObject *other, int cmp_op)
598598
return NULL;
599599
}
600600
if (PyArray_HASFIELDS(self) && PyArray_HASFIELDS(other)) {
601-
PyArray_Descr *self_descr = PyArray_DESCR(self);
602-
PyArray_Descr *other_descr = PyArray_DESCR(other);
603-
604601
/* Use promotion to decide whether the comparison is valid */
605-
PyArray_Descr *promoted = PyArray_PromoteTypes(self_descr, other_descr);
602+
PyArray_Descr *promoted = PyArray_PromoteTypes(
603+
PyArray_DESCR(self), PyArray_DESCR(other));
606604
if (promoted == NULL) {
607605
PyErr_SetString(PyExc_TypeError,
608606
"Cannot compare structured arrays unless they have a "
@@ -612,6 +610,9 @@ _void_compare(PyArrayObject *self, PyArrayObject *other, int cmp_op)
612610
}
613611
Py_DECREF(promoted);
614612

613+
_PyArray_LegacyDescr *self_descr = (_PyArray_LegacyDescr *)PyArray_DESCR(self);
614+
_PyArray_LegacyDescr *other_descr = (_PyArray_LegacyDescr *)PyArray_DESCR(other);
615+
615616
npy_intp result_ndim = PyArray_NDIM(self) > PyArray_NDIM(other) ?
616617
PyArray_NDIM(self) : PyArray_NDIM(other);
617618

0 commit comments

Comments
 (0)
0