@@ -1149,49 +1149,54 @@ string_lrstrip_whitespace(Buffer<enc> buf, Buffer<enc> out, STRIPTYPE striptype)
1149
1149
return 0 ;
1150
1150
}
1151
1151
1152
- size_t i = 0 ;
1152
+ size_t new_start = 0 ;
1153
1153
1154
1154
size_t num_bytes = (buf.after - buf.buf );
1155
1155
Buffer traverse_buf = Buffer<enc>(buf.buf , num_bytes);
1156
1156
1157
1157
if (striptype != STRIPTYPE::RIGHTSTRIP) {
1158
- while (i < len) {
1158
+ while (new_start < len) {
1159
1159
if (!traverse_buf.first_character_isspace ()) {
1160
1160
break ;
1161
1161
}
1162
1162
num_bytes -= traverse_buf.num_bytes_next_character ();
1163
- traverse_buf ++;
1164
- i ++;
1163
+ new_start ++;
1164
+ traverse_buf ++; // may go one beyond buffer
1165
1165
}
1166
1166
}
1167
1167
1168
- npy_intp j = len - 1 ; // Could also turn negative if we're stripping the whole string
1168
+ size_t new_stop = len; // New stop is a range (beyond last char)
1169
1169
if (enc == ENCODING::UTF8) {
1170
1170
traverse_buf = Buffer<enc>(buf.after , 0 ) - 1 ;
1171
1171
}
1172
1172
else {
1173
- traverse_buf = buf + j ;
1173
+ traverse_buf = buf + (new_stop - 1 ) ;
1174
1174
}
1175
1175
1176
1176
if (striptype != STRIPTYPE::LEFTSTRIP) {
1177
- while (j >= static_cast <npy_intp>(i) ) {
1177
+ while (new_stop > new_start ) {
1178
1178
if (*traverse_buf != 0 && !traverse_buf.first_character_isspace ()) {
1179
1179
break ;
1180
1180
}
1181
+
1181
1182
num_bytes -= traverse_buf.num_bytes_next_character ();
1182
- traverse_buf--;
1183
- j--;
1183
+ new_stop--;
1184
+
1185
+ // Do not step to character -1: can't find it's start for utf-8.
1186
+ if (new_stop > 0 ) {
1187
+ traverse_buf--;
1188
+ }
1184
1189
}
1185
1190
}
1186
1191
1187
- Buffer offset_buf = buf + i ;
1192
+ Buffer offset_buf = buf + new_start ;
1188
1193
if (enc == ENCODING::UTF8) {
1189
1194
offset_buf.buffer_memcpy (out, num_bytes);
1190
1195
return num_bytes;
1191
1196
}
1192
- offset_buf.buffer_memcpy (out, j - i + 1 );
1193
- out.buffer_fill_with_zeros_after_index (j - i + 1 );
1194
- return j - i + 1 ;
1197
+ offset_buf.buffer_memcpy (out, new_stop - new_start );
1198
+ out.buffer_fill_with_zeros_after_index (new_stop - new_start );
1199
+ return new_stop - new_start ;
1195
1200
}
1196
1201
1197
1202
@@ -1218,13 +1223,13 @@ string_lrstrip_chars(Buffer<enc> buf1, Buffer<enc> buf2, Buffer<enc> out, STRIPT
1218
1223
return len1;
1219
1224
}
1220
1225
1221
- size_t i = 0 ;
1226
+ size_t new_start = 0 ;
1222
1227
1223
1228
size_t num_bytes = (buf1.after - buf1.buf );
1224
1229
Buffer traverse_buf = Buffer<enc>(buf1.buf , num_bytes);
1225
1230
1226
1231
if (striptype != STRIPTYPE::RIGHTSTRIP) {
1227
- while (i < len1) {
1232
+ for (; new_start < len1; traverse_buf++ ) {
1228
1233
Py_ssize_t res;
1229
1234
switch (enc) {
1230
1235
case ENCODING::ASCII:
@@ -1245,21 +1250,20 @@ string_lrstrip_chars(Buffer<enc> buf1, Buffer<enc> buf2, Buffer<enc> out, STRIPT
1245
1250
break ;
1246
1251
}
1247
1252
num_bytes -= traverse_buf.num_bytes_next_character ();
1248
- traverse_buf++;
1249
- i++;
1253
+ new_start++;
1250
1254
}
1251
1255
}
1252
1256
1253
- npy_intp j = len1 - 1 ;
1257
+ size_t new_stop = len1; // New stop is a range (beyond last char)
1254
1258
if (enc == ENCODING::UTF8) {
1255
1259
traverse_buf = Buffer<enc>(buf1.after , 0 ) - 1 ;
1256
1260
}
1257
1261
else {
1258
- traverse_buf = buf1 + j ;
1262
+ traverse_buf = buf1 + (new_stop - 1 ) ;
1259
1263
}
1260
1264
1261
1265
if (striptype != STRIPTYPE::LEFTSTRIP) {
1262
- while (j >= static_cast <npy_intp>(i) ) {
1266
+ while (new_stop > new_start ) {
1263
1267
Py_ssize_t res;
1264
1268
switch (enc) {
1265
1269
case ENCODING::ASCII:
@@ -1280,19 +1284,22 @@ string_lrstrip_chars(Buffer<enc> buf1, Buffer<enc> buf2, Buffer<enc> out, STRIPT
1280
1284
break ;
1281
1285
}
1282
1286
num_bytes -= traverse_buf.num_bytes_next_character ();
1283
- j--;
1284
- traverse_buf--;
1287
+ new_stop--;
1288
+ // Do not step to character -1: can't find it's start for utf-8.
1289
+ if (new_stop > 0 ) {
1290
+ traverse_buf--;
1291
+ }
1285
1292
}
1286
1293
}
1287
1294
1288
- Buffer offset_buf = buf1 + i ;
1295
+ Buffer offset_buf = buf1 + new_start ;
1289
1296
if (enc == ENCODING::UTF8) {
1290
1297
offset_buf.buffer_memcpy (out, num_bytes);
1291
1298
return num_bytes;
1292
1299
}
1293
- offset_buf.buffer_memcpy (out, j - i + 1 );
1294
- out.buffer_fill_with_zeros_after_index (j - i + 1 );
1295
- return j - i + 1 ;
1300
+ offset_buf.buffer_memcpy (out, new_stop - new_start );
1301
+ out.buffer_fill_with_zeros_after_index (new_stop - new_start );
1302
+ return new_stop - new_start ;
1296
1303
}
1297
1304
1298
1305
template <typename char_type>
0 commit comments