8000 - · symfony/symfony@a7fa164 · GitHub
[go: up one dir, main page]

Skip to content

Commit a7fa164

Browse files
fancywebnicolas-grekas
authored andcommitted
-
1 parent 274edfc commit a7fa164

File tree

4 files changed

+83
-120
lines changed

4 files changed

+83
-120
lines changed

src/Symfony/Component/String/AbstractString.php

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -647,10 +647,8 @@ public function truncate(int $length, string $ellipsis = ''): self
647647
abstract public function upper(): self;
648648

649649
/**
650-
* Returns the string printable length on a terminal.
650+
* Returns the printable length on a terminal.
651651
*/
652-
abstract public function wcswidth(): int;
653-
654652
abstract public function width(bool $ignoreAnsiDecoration = true): int;
655653

656654
/**

src/Symfony/Component/String/AbstractUnicodeString.php

Lines changed: 79 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -425,91 +425,9 @@ public function upper(): parent
425425
return $str;
426426
}
427427

428-
/**
429-
* {@inheritdoc}
430-
*
431-
* If the string contains a non-printable character, -1 is returned.
432-
*
433-
* Based on https://github.com/jquast/wcwidth that is a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
434-
*/
435-
public function wcswidth(): int
436-
{
437-
$width = 0;
438-
439-
$length = $this->length();
440-
for ($i = 0; $i <= $length; ++$i) {
441-
foreach ($this->codePointsAt($i) as $codePoint) {
442-
if (
443-
0 === $codePoint || // NULL
444-
0x034F === $codePoint || // COMBINING GRAPHEME JOINER
445-
(0x200B <= $codePoint && 0x200F >= $codePoint) || // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
446-
0x2028 === $codePoint || // LINE SEPARATOR
447-
0x2029 === $codePoint || // PARAGRAPH SEPARATOR
448-
(0x202A <= $codePoint && 0x202E >= $codePoint) || // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
449-
(0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
450-
) {
451-
continue;
452-
}
453-
454-
if (
455-
32 > $codePoint || // C0 control characters
456-
(0x07F <= $codePoint && 0x0A0 > $codePoint) // C1 control characters and DEL
457-
) {
458-
return -1;
459-
}
460-
461-
static $tableZero;
462-
if (null === $tableZero) {
463-
$tableZero = require __DIR__.'/Resources/data/wcswidth_table_zero.php';
464-
}
465-
466-
if ($codePoint >= $tableZero[0][0] && $codePoint <= $tableZero[$ubound = \count($tableZero) - 1][1]) {
467-
$lbound = 0;
468-
while ($ubound >= $lbound) {
469-
$mid = floor(($lbound + $ubound) / 2);
470-
471-
if ($codePoint > $tableZero[$mid][1]) {
472-
$lbound = $mid + 1;
473-
} elseif ($codePoint < $tableZero[$mid][0]) {
474-
$ubound = $mid - 1;
475-
} else {
476-
continue 2;
477-
}
478-
}
479-
}
480-
481-
static $tableWide;
482-
if (null === $tableWide) {
483-
$tableWide = require __DIR__.'/Resources/data/wcswidth_table_wide.php';
484-
}
485-
486-
if ($codePoint >= $tableWide[0][0] && $codePoint <= $tableWide[$ubound = \count($tableWide) - 1][1]) {
487-
$lbound = 0;
488-
while ($ubound >= $lbound) {
489-
$mid = floor(($lbound + $ubound) / 2);
490-
491-
if ($codePoint > $tableWide[$mid][1]) {
492-
$lbound = $mid + 1;
493-
} elseif ($codePoint < $tableWide[$mid][0]) {
494-
$ubound = $mid - 1;
495-
} else {
496-
$width += 2;
497-
498-
continue 2;
499-
}
500-
}
501-
}
502-
503-
++$width;
504-
}
505-
}
506-
507-
return $width;
508-
}
509-
510428
public function width(bool $ignoreAnsiDecoration = true): int
511429
{
512-
$width = 0;
430+
$width = -1;
513431
$s = str_replace(["\x00", "\x05", "\x07"], '', $this->string);
514432

515433
if (false !== strpos($s, "\r")) {
@@ -525,11 +443,7 @@ public function width(bool $ignoreAnsiDecoration = true): int
525443
)/x', '', $s);
526444
}
527445

528-
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
529-
$s = preg_replace('/[\x00\x05\x07\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11FF}\x{200B}]+/u', '', $s);
530-
$s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide);
531-
532-
if ($width < $w += mb_strlen($s, 'UTF-8') + ($wide << 1)) {
446+
if ($width < $w = $this->wcwidth($s)) {
533447
$width = $w;
534448
}
535449
}
@@ -575,4 +489,81 @@ private function pad(int $len, self $pad, int $type): parent
575489
throw new InvalidArgumentException('Invalid padding type.');
576490
}
577491
}
492+
493+
/**
494+
* Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
495+
*
496+
* If the string contains a non-printable character, -1 is returned.
497+
*/
498+
private function wcwidth(string $string): int
499+
{
500+
$width = 0;
501+
502+
foreach (preg_split('/./u', $string, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY) as $codePoint) {
503+
$codePoint = mb_ord($codePoint, 'UTF-8');
504+
505+
if (0 === $codePoint // NULL
506+
|| 0x034F === $codePoint // COMBINING GRAPHEME JOINER
507+
|| (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
508+
|| 0x2028 === $codePoint // LINE SEPARATOR
509+
|| 0x2029 === $codePoint // PARAGRAPH SEPARATOR
510+
|| (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
511+
|| (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
512+
) {
513+
continue;
514+
}
515+
516+
if (32 > $codePoint // C0 control characters
517+
|| (0x07F <= $codePoint && 0x0A0 > $codePoint) // C1 control characters and DEL
518+
) {
519+
return -1;
520+
}
521+
522+
static $tableZero;
523+
if (null === $tableZero) {
524+
$tableZero = require __DIR__.'/Resources/data/wcwidth_table_zero.php';
525+
}
526+
527+
if ($codePoint >= $tableZero[0][0] && $codePoint <= $tableZero[$ubound = \count($tableZero) - 1][1]) {
528+
$lbound = 0;
529+
while ($ubound >= $lbound) {
530+
$mid = floor(($lbound + $ubound) / 2);
531+
532+
if ($codePoint > $tableZero[$mid][1]) {
533+
$lbound = $mid + 1;
534+
} elseif ($codePoint < $tableZero[$mid][0]) {
535+
$ubound = $mid - 1;
536+
} else {
537+
continue 2;
538+
}
539+
}
540+
}
541+
542+
static $tableWide;
543+
if (null === $tableWide) {
544+
$tableWide = require __DIR__.'/Resources/data/wcwidth_table_wide.php';
545+
}
546+
547+
if ($codePoint >= $tableWide[0][0] && $codePoint <= $tableWide[$ubound = \count($tableWide) - 1][1]) {
548+
$lbound = 0;
549+
while ($ubound >= $lbound) {
550+
$mid = floor(($lbound + $ubound) / 2);
551+
552+
if ($codePoint > $tableWide[$mid][1]) {
553+
$lbound = $mid + 1;
554+
} elseif ($codePoint < $tableWide[$mid][0]) {
555+
$ubound = $mid - 1;
556+
} else {
557+
$width += 2;
558+
559+
continue 2;
560+
}
561+
}
562+
}
563+
564+
++$width;
565+
}
566+
567+
return $width;
568+
}
578569
}

src/Symfony/Component/String/ByteString.php

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -461,36 +461,10 @@ public function upper(): parent
461461
/**
462462
* {@inheritdoc}
463463
*/
464-
public function wcswidth(): int
465-
{
466-
return $this->width();
467-
}
468-
469464
public function width(bool $ignoreAnsiDecoration = true): int
470465
{
471-
$width = 0;
472-
$s = str_replace(["\x00", "\x05", "\x07"], '', $this->string);
473-
474-
if (false !== strpos($s, "\r")) {
475-
$s = str_replace(["\r\n", "\r"], "\n", $s);
476-
}
477-
478-
foreach (explode("\n", $s) as $s) {
479-
if ($ignoreAnsiDecoration) {
480-
$s = preg_replace('/\x1B(?:
481-
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
482-
| [P\]X^_] .*? \x1B\\\\
483-
| [\x41-\x7E]
484-
)/x', '', $s);
485-
}
486-
487-
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
488-
489-
if ($width < $w += \strlen($s)) {
490-
$width = $w;
491-
}
492-
}
466+
$string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF\]/', '?', $this->string);
493467

494-
return $width;
468+
return (new CodePointString($string))->width($ignoreAnsiDecoration);
495469
}
496470
}

src/Symfony/Component/String/CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ CHANGELOG
55
-----
66

77
* Added the `AbstractString::reverse()` method.
8-
* Added the `AbstractString::wcswidth()` method.
8+
* Made `AbstractString::width()` follow POSIX.1-2001
99

1010
5.0.0
1111
-----

0 commit comments

Comments
 (0)
0