8000 bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623) (GH-10… · python/cpython@6f5fa1b · GitHub
[go: up one dir, main page]

Skip to content

Commit 6f5fa1b

Browse files
authored
bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623) (GH-10718)
Fix str.format(), float.__format__() and complex.__format__() methods for non-ASCII decimal point when using the "n" formatter. Rewrite _PyUnicode_InsertThousandsGrouping(): it now requires a _PyUnicodeWriter object for the buffer and a Python str object for digits. (cherry picked from commit 59423e3)
1 parent e88553c commit 6f5fa1b

File tree

5 files changed

+215
-246
lines changed

5 files changed

+215
-246
lines changed

Include/unicodeobject.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2135,10 +2135,10 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
21352135
see Objects/stringlib/localeutil.h */
21362136
#ifndef Py_LIMITED_API
21372137
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
2138-
PyObject *unicode,
2139-
Py_ssize_t index,
2138+
_PyUnicodeWriter *writer,
21402139
Py_ssize_t n_buffer,
2141-
void *digits,
2140+
PyObject *digits,
2141+
Py_ssize_t d_pos,
21422142
Py_ssize_t n_digits,
21432143
Py_ssize_t min_width,
21442144
const char *grouping,
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
For :meth:`str.format`, :meth:`float.__format__` and
2+
:meth:`complex.__format__` methods for non-ASCII decimal point when using
3+
the "n" formatter.

Objects/stringlib/localeutil.h

Lines changed: 35 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,24 @@
1-
/* stringlib: locale related helpers implementation */
2-
3-
#include <locale.h>
4-
5-
#if !STRINGLIB_IS_UNICODE
6-
# error "localeutil.h is specific to Unicode"
7-
#endif
1+
/* _PyUnicode_InsertThousandsGrouping() helper functions */
82

93
typedef struct {
104
const char *grouping;
115
char previous;
126
Py_ssize_t i; /* Where we're currently pointing in grouping. */
13-
} STRINGLIB(GroupGenerator);
7+
} GroupGenerator;
8+
149

1510
static void
16-
STRINGLIB(GroupGenerator_init)(STRINGLIB(GroupGenerator) *self, const char *grouping)
11+
GroupGenerator_init(GroupGenerator *self, const char *grouping)
1712
{
1813
self->grouping = grouping;
1914
self->i = 0;
2015
self->previous = 0;
2116
}
2217

18+
2319
/* Returns the next grouping, or 0 to signify end. */
2420
static Py_ssize_t
25-
STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
21+
GroupGenerator_next(GroupGenerator *self)
2622
{
2723
/* Note that we don't really do much error checking here. If a
2824
grouping string contains just CHAR_MAX, for example, then just
@@ -43,138 +39,44 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
4339
}
4440
}
4541

42+
4643
/* Fill in some digits, leading zeros, and thousands separator. All
4744
are optional, depending on when we're called. */
4845
static void
49-
STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
50-
Py_ssize_t n_chars, Py_ssize_t n_zeros, STRINGLIB_CHAR* thousands_sep,
51-
Py_ssize_t thousands_sep_len)
46+
InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
47+
PyObject *digits, Py_ssize_t *digits_pos,
48+
Py_ssize_t n_chars, Py_ssize_t n_zeros,
49+
PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
50+
Py_UCS4 *maxchar)
5251
{
53-
Py_ssize_t i;
52+
if (!writer) {
53+
/* if maxchar > 127, maxchar is already set */
54+
if (*maxchar == 127 && thousands_sep) {
55+
Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
56+
*maxchar = Py_MAX(*maxchar, maxchar2);
57+
}
58+
return;
59+
}
5460

5561
if (thousands_sep) {
56-
*buffer_end -= thousands_sep_len;
62+
*buffer_pos -= thousands_sep_len;
5763

5864
/* Copy the thousands_sep chars into the buffer. */
59-
memcpy(*buffer_end, thousands_sep,
60-
thousands_sep_len * STRINGLIB_SIZEOF_CHAR);
61-
}
62-
63-
*buffer_end -= n_chars;
64-
*digits_end -= n_chars;
65-
memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
66-
67-
*buffer_end -= n_zeros;
68-
for (i = 0; i < n_zeros; i++)
69-
(*buffer_end)[i] = '0';
70-
}
71-
72-
/**
73-
* InsertThousandsGrouping:
74-
* @buffer: A pointer to the start of a string.
75-
* @n_buffer: Number of characters in @buffer.
76-
* @digits: A pointer to the digits we're reading from. If count
77-
* is non-NULL, this is unused.
78-
* @n_digits: The number of digits in the string, in which we want
79-
* to put the grouping chars.
80-
* @min_width: The minimum width of the digits in the output string.
81-
* Output will be zero-padded on the left to fill.
82-
* @grouping: see definition in localeconv().
83-
* @thousands_sep: see definition in localeconv().
84-
*
85-
* There are 2 modes: counting and filling. If @buffer is NULL,
86-
* we are in counting mode, else filling mode.
87-
* If counting, the required buffer size is returned.
88-
* If filling, we know the buffer will be large enough, so we don't
89-
* need to pass in the buffer size.
90-
* Inserts thousand grouping characters (as defined by groupin F438 g and
91-
* thousands_sep) into the string between buffer and buffer+n_digits.
92-
*
93-
* Return value: 0 on error, else 1. Note that no error can occur if
94-
* count is non-NULL.
95-
*
96-
* This name won't be used, the includer of this file should define
97-
* it to be the actual function name, based on unicode or string.
98-
*
99-
* As closely as possible, this code mimics the logic in decimal.py's
100-
_insert_thousands_sep().
101-
**/
102-
static Py_ssize_t
103-
STRINGLIB(InsertThousandsGrouping)(
104-
STRINGLIB_CHAR *buffer,
105-
Py_ssize_t n_buffer,
106-
STRINGLIB_CHAR *digits,
107-
Py_ssize_t n_digits,
108-
Py_ssize_t min_width,
109-
const char *grouping,
110-
STRINGLIB_CHAR *thousands_sep,
111-
Py_ssize_t thousands_sep_len)
112-
{
113-
Py_ssize_t count = 0;
114-
Py_ssize_t n_zeros;
115-
int loop_broken = 0;
116-
int use_separator = 0; /* First time through, don't append the
117-
separator. They only go between
118-
groups. */
119-
STRINGLIB_CHAR *buffer_end = NULL;
120-
STRINGLIB_CHAR *digits_end = NULL;
121-
Py_ssize_t l;
122-
Py_ssize_t n_chars;
123-
Py_ssize_t remaining = n_digits; /* Number of chars remaining to
124-
be looked at */
125-
/* A generator that returns all of the grouping widths, until it
126-
returns 0. */
127-
STRINGLIB(GroupGenerator) groupgen;
128-
STRINGLIB(GroupGenerator_init)(&groupgen, grouping);
129-
130-
if (buffer) {
131-
buffer_end = buffer + n_buffer;
132-
digits_end = digits + n_digits;
133-
}
134-
135-
while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) {
136-
l = Py_MIN(l, Py_MAX(Py_MAX(remaining, min_width), 1));
137-
n_zeros = Py_MAX(0, l - remaining);
138-
n_chars = Py_MAX(0, Py_MIN(remaining, l));
139-
140-
/* Use n_zero zero's and n_chars chars */
141-
142-
/* Count only, don't do anything. */
143-
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
144-
145-
if (buffer) {
146-
/* Copy into the output buffer. */
147-
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
148-
use_separator ? thousands_sep : NULL, thousands_sep_len);
149-
}
150-
151-
/* Use a separator next time. */
152-
use_separator = 1;
153-
154-
remaining -= n_chars;
155-
min_width -= l;
156-
157-
if (remaining <= 0 && min_width <= 0) {
158-
loop_broken = 1;
159-
break;
160-
}
161-
min_width -= thousands_sep_len;
65+
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
66+
thousands_sep, 0,
67+
thousands_sep_len);
16268
}
163-
if (!loop_broken) {
164-
/* We left the loop without using a break statement. */
16569

166-
l = Py_MAX(Py_MAX(remaining, min_width), 1);
167-
n_zeros = Py_MAX(0, l - remaining);
168-
n_chars = Py_MAX(0, Py_MIN(remaining, l));
169-
170-
/* Use n_zero zero's and n_chars chars */
171-
count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
172-
if (buffer) {
173-
/* Copy into the output buffer. */
174-
STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
175-
use_separator ? thousands_sep : NULL, thousands_sep_len);
176-
}
70+
*buffer_pos -= n_chars;
71+
*digits_pos -= n_chars;
72+
_PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
73+
digits, *digits_pos,
74+
n_chars);
75+
76+
if (n_zeros) {
77+
*buffer_pos -= n_zeros;
78+
enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
79+
void *data = PyUnicode_DATA(writer->buffer);
80+
FILL(kind, data, '0', *buffer_pos, n_zeros);
17781
}
178-
return count;
17982
}
180-

0 commit comments

Comments
 (0)
0