data-apis
diff --git a/‎protocol/dataframe_protocol.py
Lines changed: 14 additions & 14 deletions b/‎protocol/dataframe_protocol.py
Lines changed: 14 additions & 14 deletions
diff --git a/‎protocol/pandas_implementation.py
Lines changed: 12 additions & 1 deletion b/‎protocol/pandas_implementation.py
Lines changed: 12 additions & 1 deletion
@@ -12,14 +12,14 @@
 1. A `Buffer` class. A *buffer* is a contiguous block of memory - this is the
   only thing that actually maps to a 1-D array in a sense that it could be
   converted to NumPy, CuPy, et al.
-2. A `Column` class. A *column* has a name and a single dtype. It can consist
+2. A `Column` class. A *column* has a single dtype. It can consist
    of multiple *chunks*. A single chunk of a column (which may be the whole
    column if ``num_chunks == 1``) is modeled as again a `Column` instance, and
    contains 1 data *buffer* and (optionally) one *mask* for missing data.
-3. A `DataFrame` class. A *data frame* is an ordered collection of *columns*.
-   It has a single device, and all its rows are the same length. It can consist
-   of multiple *chunks*. A single chunk of a data frame is modeled as
-   again a `DataFrame` instance.
+3. A `DataFrame` class. A *data frame* is an ordered collection of *columns*,
+   which are identified with names that are unique strings.  All the data
+   frame's rows are the same length. It can consist of multiple *chunks*. A
+   single chunk of a data frame is modeled as again a `DataFrame` instance.
 4. A *mask* concept. A *mask* of a single-chunk column is a *buffer*.
 5. A *chunk* concept. A *chunk* is a sub-dividing element that can be applied
    to a *data frame* or a *column*.
@@ -59,7 +59,7 @@
 
 Note that row labels could be added in the future - right now there's no clear
 requirements for more complex row labels that cannot be represented by a single
-column. That do exist, for example Modin has has table and tree-based row
+column. These do exist, for example Modin has has table and tree-based row
 labels.
 
 """
@@ -194,19 +194,19 @@ def offset(self) -> int:
         pass
 
     @property
-    def dtype(self) -> Tuple[int, int, str, str]:
+    def dtype(self) -> Tuple[enum.IntEnum, int, str, str]:
         """
         Dtype description as a tuple ``(kind, bit-width, format string, endianness)``
 
         Kind :
 
-            - 0 : signed integer
-            - 1 : unsigned integer
-            - 2 : IEEE floating point
-            - 20 : boolean
-            - 21 : string (UTF-8)
-            - 22 : datetime
-            - 23 : categorical
+            - INT = 0
+            - UINT = 1
+            - FLOAT = 2
+            - BOOL = 20
+            - STRING = 21   # UTF-8
+         
8000
   - DATETIME = 22
+            - CATEGORICAL = 23
 
         Bit-width : the number of bits as an integer
         Format string : data type description format string in Apache Arrow C
 
@@ -68,6 +68,16 @@ def _from_dataframe(df : DataFrameObject) -> pd.DataFrame:
     return pd.DataFrame(columns)
 
 
+class _DtypeKind(enum.IntEnum):
+    INT = 0
+    UINT = 1
+    FLOAT = 2
+    BOOL = 20
+    STRING = 21   # UTF-8
+    DATETIME = 22
+    CATEGORICAL = 23
+
+
 def convert_column_to_ndarray(col : ColumnObject) -> np.ndarray:
     """
     """
@@ -82,7 +92,8 @@ def convert_column_to_ndarray(col : ColumnObject) -> np.ndarray:
     _dtype = col.dtype
     kind = _dtype[0]
     bitwidth = _dtype[1]
-    if _dtype[0] not in (0, 1, 2, 20):
+    _k = _DtypeKind
+    if _dtype[0] not in (_k.INT, _k.UINT, _k.FLOAT, _k.BOOL):
         raise RuntimeError("Not a boolean, integer or floating-point dtype")
 
     _ints = {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}