ENH: Support character string arrays

TST: added test for issue #18684 ENH: f2py opens files with correct encoding, fixes #635 TST: added test for issue #6308 TST: added test for issue #4519 TST: added test for issue #3425 ENH: Implement user-defined hooks support for post-processing f2py data structure. Implement character BC hook. ENH: Add support for detecting utf-16 and utf-32 encodings.
numpy · melissawm · Jun 6, 2022 · Jul 1, 2021 · Jun 5, 2022 · Jun 5, 2022
commit d4e11c7a2eb64861275facb076d47ccd135fa28c
diff --git a/doc/source/f2py/advanced.rst b/doc/source/f2py/advanced.rst
@@ -96,4 +96,54 @@ and the corresponding <C type>. The <C type> can be one of the following::
     complex_long_double
     string
 
 For more information, see the F2Py source code ``numpy/f2py/capi_maps.py``.
+For more information, see F2Py source code ``numpy/f2py/capi_maps.py``.
+
+.. _Character strings:
+
+Character strings
+=================
+
+Assumed length chararacter strings
+-----------------------------------
+
+In Fortran, assumed length character string arguments are declared as
+``character*(*)`` or ``character(len=*)``, that is, the length of such
+arguments are determined by the actual string arguments at runtime.
+For ``intent(in)`` arguments, this lack of length information poses no
+problems for f2py to construct functional wrapper functions. However,
+for ``intent(out)`` arguments, the lack of length information is
+problematic for f2py generated wrappers because there is no size
+information available for creating memory buffers for such arguments
+and F2PY assumes the length is 0.  Depending on how the length of
+assumed length character strings are specified, there exist ways to
+workaround this problem, as exemplified below.
+
+If the length of the ``character*(*)`` output argument is determined
+by the state of other input arguments, the required connection can be
+established in a signature file or within a f2py-comment by adding an
+extra declaration for the corresponding argument that specifies the
+length in character selector part. For example, consider a Fortran
+file ``asterisk1.f90``:
+
+.. include:: asterisk1.f90
+  :literal:
+
+Compile it with ``f2py -c asterisk1.f90 -m asterisk1`` and then in Python:
+
+.. include:: asterisk1_session.dat
+  :literal:
+
+Notice that the extra declaration ``character(f2py_len=12) s`` is
+interpreted only by f2py and in the ``f2py_len=`` specification one
+can use C-expressions as a length value.
+
+In the following example:
+
+.. include:: asterisk2.f90
+  :literal:
+
+the lenght of output assumed length string depends on an input
+argument ``n``, after wrapping with F2PY, in Python:
+
+.. include:: asterisk2_session.dat
+  :literal:
diff --git a/doc/source/f2py/asterisk1.f90 b/doc/source/f2py/asterisk1.f90
@@ -0,0 +1,5 @@
+subroutine foo1(s)
+  character*(*), intent(out) :: s
+  !f2py character(f2py_len=12) s
+  s = "123456789A12"
+end subroutine foo1
diff --git a/doc/source/f2py/asterisk1_session.dat b/doc/source/f2py/asterisk1_session.dat
@@ -0,0 +1,3 @@
+>>> import asterisk1
+>>> asterisk1.foo1()
+b'123456789A12'
diff --git a/doc/source/f2py/asterisk2.f90 b/doc/source/f2py/asterisk2.f90
@@ -0,0 +1,6 @@
+subroutine foo2(s, n)
+  character(len=*), intent(out) :: s
+  integer, intent(in) :: n
+  !f2py character(f2py_len=n), depend(n) :: s
+  s = "123456789A123456789B"(1:n)
+end subroutine foo2
diff --git a/doc/source/f2py/asterisk2_session.dat b/doc/source/f2py/asterisk2_session.dat
@@ -0,0 +1,6 @@
+>>> import asterisk
+>>> asterisk.foo2(2)
+b'12'
+>>> asterisk.foo2(12)
+b'123456789A12'
+>>> 
diff --git a/doc/source/f2py/signature-file.rst b/doc/source/f2py/signature-file.rst
@@ -645,21 +645,20 @@ A C expression may contain:
   according to given dependence relations;
 * the following CPP macros:
 
-  * ``rank(<name>)``
+  ``f2py_rank(<name>)``
     Returns the rank of an array ``<name>``.
-
-  * ``shape(<name>,<n>)``
+  ``f2py_shape(<name>, <n>)``
     Returns the ``<n>``-th dimension of an array ``<name>``.
-
-  * ``len(<name>)``
+  ``f2py_len(<name>)``
     Returns the length of an array ``<name>``.
-
-  * ``size(<name>)``
+  ``f2py_size(<name>)``
     Returns the size of an array ``<name>``.
-
-  * ``slen(<name>)``
+  ``f2py_itemsize(<name>)``
+    Returns the itemsize of an array ``<name>``.
+  ``f2py_slen(<name>)``
     Returns the length of a string ``<name>``.
 
+
 For initializing an array ``<array name>``, F2PY generates a loop over
 all indices and dimensions that executes the following
 pseudo-statement::
@@ -706,4 +705,15 @@ Currently, multi-line blocks can be used in the following constructs:
 
 * as a list of C arrays of the ``pymethoddef`` statement;
 
-* as a documentation string.
++ as documentation string.
+
+Extended char-selector
+-----------------------
+
+F2PY extends char-selector specification, usable within a signature
+file or a F2PY directive, as follows::
+
+  <extended-charselector> := <charselector>
+                          | (f2py_len= <len>)
+
+See :ref:`Character Strings` for usage.
diff --git a/numpy/f2py/auxfuncs.py b/numpy/f2py/auxfuncs.py
@@ -28,18 +28,21 @@
     'getfortranname', 'getpymethoddef', 'getrestdoc', 'getusercode',
     'getusercode1', 'hasbody', 'hascallstatement', 'hascommon',
     'hasexternals', 'hasinitvalue', 'hasnote', 'hasresultnote',
-    'isallocatable', 'isarray', 'isarrayofstrings', 'iscomplex',
+    'isallocatable', 'isarray', 'isarrayofstrings',
+    'ischaracter', 'ischaracterarray', 'ischaracter_or_characterarray',
+    'iscomplex',
     'iscomplexarray', 'iscomplexfunction', 'iscomplexfunction_warn',
     'isdouble', 'isdummyroutine', 'isexternal', 'isfunction',
-    'isfunction_wrap', 'isint1array', 'isinteger', 'isintent_aux',
+    'isfunction_wrap', 'isint1', 'isint1array', 'isinteger', 'isintent_aux',
     'isintent_c', 'isintent_callback', 'isintent_copy', 'isintent_dict',
     'isintent_hide', 'isintent_in', 'isintent_inout', 'isintent_inplace',
     'isintent_nothide', 'isintent_out', 'isintent_overwrite', 'islogical',
     'islogicalfunction', 'islong_complex', 'islong_double',
     'islong_doublefunction', 'islong_long', 'islong_longfunction',
     'ismodule', 'ismoduleroutine', 'isoptional', 'isprivate', 'isrequired',
     'isroutine', 'isscalar', 'issigned_long_longarray', 'isstring',
-    'isstringarray', 'isstringfunction', 'issubroutine',
+    'isstringarray', 'isstring_or_stringarray', 'isstringfunction',
+    'issubroutine',
     'issubroutine_wrap', 'isthreadsafe', 'isunsigned', 'isunsigned_char',
     'isunsigned_chararray', 'isunsigned_long_long',
     'isunsigned_long_longarray', 'isunsigned_short',
@@ -68,24 +71,41 @@ def debugcapi(var):
     return 'capi' in debugoptions
 
 
+def _ischaracter(var):
+    return 'typespec' in var and var['typespec'] == 'character' and \
+           not isexternal(var)
+
+
 def _isstring(var):
     return 'typespec' in var and var['typespec'] == 'character' and \
            not isexternal(var)
 
 
-def isstring(var):
-    return _isstring(var) and not isarray(var)
+def ischaracter_or_characterarray(var):
+    return _ischaracter(var) and 'charselector' not in var
 
 
 def ischaracter(var):
-    return isstring(var) and 'charselector' not in var
+    return ischaracter_or_characterarray(var) and not isarray(var)
+
+
+def ischaracterarray(var):
+    return ischaracter_or_characterarray(var) and isarray(var)
+
+
+def isstring_or_stringarray(var):
+    return _ischaracter(var) and 'charselector' in var
+
+
+def isstring(var):
+    return isstring_or_stringarray(var) and not isarray(var)
 
 
 def isstringarray(var):
-    return isarray(var) and _isstring(var)
+    return isstring_or_stringarray(var) and isarray(var)
 
 
-def isarrayofstrings(var):
+def isarrayofstrings(var):  # obsolete?
     # leaving out '*' for now so that `character*(*) a(m)` and `character
     # a(m,*)` are treated differently. Luckily `character**` is illegal.
     return isstringarray(var) and var['dimension'][-1] == '(*)'
@@ -126,6 +146,11 @@ def get_kind(var):
             pass
 
 
+def isint1(var):
+    return var.get('typespec') == 'integer' \
+        and get_kind(var) == '1' and not isarray(var)
+
+
 def islong_long(var):
     if not isscalar(var):
         return 0
@@ -426,6 +451,7 @@ def isintent_hide(var):
             ('out' in var['intent'] and 'in' not in var['intent'] and
                 (not l_or(isintent_inout, isintent_inplace)(var)))))
 
+
 def isintent_nothide(var):
     return not isintent_hide(var)
 
@@ -469,6 +495,7 @@ def isintent_aligned8(var):
 def isintent_aligned16(var):
     return 'aligned16' in var.get('intent', [])
 
+
 isintent_dict = {isintent_in: 'INTENT_IN', isintent_inout: 'INTENT_INOUT',
                  isintent_out: 'INTENT_OUT', isintent_hide: 'INTENT_HIDE',
                  isintent_cache: 'INTENT_CACHE',
@@ -566,19 +593,19 @@ def __call__(self, var):
 
 
 def l_and(*f):
-    l, l2 = 'lambda v', []
+    l1, l2 = 'lambda v', []
     for i in range(len(f)):
-        l = '%s,f%d=f[%d]' % (l, i, i)
+        l1 = '%s,f%d=f[%d]' % (l1, i, i)
         l2.append('f%d(v)' % (i))
-    return eval('%s:%s' % (l, ' and '.join(l2)))
+    return eval('%s:%s' % (l1, ' and '.join(l2)))
 
 
 def l_or(*f):
-    l, l2 = 'lambda v', []
+    l1, l2 = 'lambda v', []
     for i in range(len(f)):
-        l = '%s,f%d=f[%d]' % (l, i, i)
+        l1 = '%s,f%d=f[%d]' % (l1, i, i)
         l2.append('f%d(v)' % (i))
-    return eval('%s:%s' % (l, ' or '.join(l2)))
+    return eval('%s:%s' % (l1, ' or '.join(l2)))
 
 
 def l_not(f):
@@ -666,7 +693,9 @@ def getcallprotoargument(rout, cb_map={}):
                 pass
             else:
                 ctype = ctype + '*'
-            if isstring(var) or isarrayofstrings(var):
+            if ((isstring(var)
+                 or isarrayofstrings(var)  # obsolete?
+                 or isstringarray(var))):
                 arg_types2.append('size_t')
         arg_types.append(ctype)
 
@@ -731,14 +760,14 @@ def getrestdoc(rout):
 
 
 def gentitle(name):
-    l = (80 - len(name) - 6) // 2
-    return '/*%s %s %s*/' % (l * '*', name, l * '*')
+    ln = (80 - len(name) - 6) // 2
+    return '/*%s %s %s*/' % (ln * '*', name, ln * '*')
 
 
-def flatlist(l):
-    if isinstance(l, list):
-        return reduce(lambda x, y, f=flatlist: x + f(y), l, [])
-    return [l]
+def flatlist(lst):
+    if isinstance(lst, list):
+        return reduce(lambda x, y, f=flatlist: x + f(y), lst, [])
+    return [lst]
 
 
 def stripcomma(s):