8000 [String] Made AbstractString::width() follow POSIX.1-2001 · symfony/symfony@4967e13 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4967e13

Browse files
[String] Made AbstractString::width() follow POSIX.1-2001
Co-authored-by: Nicolas Grekas <nicolas.grekas@gmail.com>
1 parent 07818f2 commit 4967e13

12 files changed

+2698
-38
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
/Resources/bin/update-data.php export-ignore
2+
/Resources/WcswidthDataGenerator.php export-ignore
13
/Tests export-ignore
24
/phpunit.xml.dist export-ignore
35
/.gitignore export-ignore

src/Symfony/Component/String/AbstractString.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,9 @@ public function truncate(int $length, string $ellipsis = ''): self
646646
*/
647647
abstract public function upper(): self;
648648

649+
/**
650+
* Returns the printable length on a terminal.
651+
*/
649652
abstract public function width(bool $ignoreAnsiDecoration = true): int;
650653

651654
/**

src/Symfony/Component/String/AbstractUnicodeString.php

Lines changed: 78 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -352,9 +352,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
352352
return $str;
353353
}
354354

355-
/**
356-
* {@inheritdoc}
357-
*/
358355
public function reverse(): parent
359356
{
360357
$str = clone $this;
@@ -444,22 +441,20 @@ public function width(bool $ignoreAnsiDecoration = true): int
444441
$s = str_replace(["\r\n", "\r"], "\n", $s);
445442
}
446443

444+
if (!$ignoreAnsiDecoration) {
445+
$s = preg_replace('/[\p{Cc}\x7F]++/u', '', $s);
446+
}
447+
447448
foreach (explode("\n", $s) as $s) {
448449
if ($ignoreAnsiDecoration) {
449-
$s = preg_replace('/\x1B(?:
450+
$s = preg_replace('/(?:\x1B(?:
450451
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
451452
| [P\]X^_] .*? \x1B\\\\
452453
| [\x41-\x7E]
453-
)/x', '', $s);
454+
)|[\p{Cc}\x7F]++)/xu', '', $s);
454455
}
455456

456-
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
457-
$s = preg_replace('/[\x00\x05\x07\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11FF}\x{200B}]+/u', '', $s);
458-
$s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide);
459-
460-
if ($width < $w += mb_strlen($s, 'UTF-8') + ($wide << 1)) {
461-
$width = $w;
462-
}
457+
$width += $this->wcswidth($s);
463458
}
464459

465460
return $width;
@@ -503,4 +498,75 @@ private function pad(int $len, self $pad, int $type): parent
503498
throw new InvalidArgumentException('Invalid padding type.');
504499
}
505500
}
501+
502+
/**
503+
* Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
504+
*
505+
* Contrary to the source implementation, non printable characters (C0 and C1 control codes + DEL) does not return -1. They must be dropped before.
506+
*/
507+
private function wcswidth(string $string): int
508+
{
509+
$width = 0;
510+
511+
foreach (preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY) as $c) {
512+
$codePoint = mb_ord($c, 'UTF-8');
513+
514+
if (0 === $codePoint // NULL
515+
|| 0x034F === $codePoint // COMBINING GRAPHEME JOINER
516+
|| (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
517+
|| 0x2028 === $codePoint // LINE SEPARATOR
518+
|| 0x2029 === $codePoint // PARAGRAPH SEPARATOR
519+
|| (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
520+
|| (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
521+
) {
522+
continue;
523+
}
524+
525+
static $tableZero;
526+
if (null === $tableZero) {
527+
$tableZero = require __DIR__.'/Resources/data/wcswidth_table_zero.php';
528+
}
529+
530+
if ($codePoint >= $tableZero[0][0] && $codePoint <= $tableZero[$ubound = \count($tableZero) - 1][1]) {
531+
$lbound = 0;
532+
while ($ubound >= $lbound) {
533+
$mid = floor(($lbound + $ubound) / 2);
534+
535+
if ($codePoint > $tableZero[$mid][1]) {
536+
$lbound = $mid + 1;
537+
} elseif ($codePoint < $tableZero[$mid][0]) {
538+
$ubound = $mid - 1;
539+
} else {
540+
continue 2;
541+
}
542+
}
543+
}
544+
545+
static $tableWide;
546+
if (null === $tableWide) {
547+
$tableWide = require __DIR__.'/Resources/data/wcswidth_table_wide.php';
548+
}
549+
550+
if ($codePoint >= $tableWide[0][0] && $codePoint <= $tableWide[$ubound = \count($tableWide) - 1][1]) {
551+
$lbound = 0;
552+
while ($ubound >= $lbound) {
553+
$mid = floor(($lbound + $ubound) / 2);
554+
555+
if ($codePoint > $tableWide[$mid][1]) {
556+
$lbound = $mid + 1;
557+
} elseif ($codePoint < $tableWide[$mid][0]) {
558+
$ubound = $mid - 1;
559+
} else {
560+
$width += 2;
561+
562+
continue 2;
563+
}
564+
}
565+
}
566+
567+
++$width;
568+
}
569+
570+
return $width;
571+
}
506572
}

src/Symfony/Component/String/ByteString.php

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -303,9 +303,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
303303
return $str;
304304
}
305305

306-
/**
307-
* {@inheritdoc}
308-
*/
309306
public function reverse(): parent
310307
{
311308
$str = clone $this;
@@ -460,29 +457,8 @@ public function upper(): parent
460457

461458
public function width(bool $ignoreAnsiDecoration = true): int
462459
{
463-
$width = 0;
464-
$s = str_replace(["\x00", "\x05", "\x07"], '', $this->string);
460+
$string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);
465461

466-
if (false !== strpos($s, "\r")) {
467-
$s = str_replace(["\r\n", "\r"], "\n", $s);
468-
}
469-
470-
foreach (explode("\n", $s) as $s) {
471-
if ($ignoreAnsiDecoration) {
472-
$s = preg_replace('/\x1B(?:
473-
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
474-
| [P\]X^_] .*? \x1B\\\\
475-
| [\x41-\x7E]
476-
)/x', '', $s);
477-
}
478-
479-
$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
480-
481-
if ($width < $w += \strlen($s)) {
482-
$width = $w;
483-
}
484-
}
485-
486-
return $width;
462+
return (new CodePointString($string))->width($ignoreAnsiDecoration);
487463
}
488464
}

src/Symfony/Component/String/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CHANGELOG
55
-----
66

77
* Added the `AbstractString::reverse()` method.
8+
* Made `AbstractString::width()` follow POSIX.1-2001.
89

910
5.0.0
1011
-----
Lines changed: 113 additions & 0 deletions
+
throw new RuntimeException(sprintf('The "%s" file could not be written.', $fileName));
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\String\Resources;
13+
14+
use Symfony\Component\HttpClient\HttpClient;
15+
use Symfony\Component\String\Exception\RuntimeException;
16+
use Symfony\Component\VarExporter\VarExporter;
17+
18+
/**
19+
* @internal
20+
*/
21+
final class WcswidthDataGenerator
22+
{
23+
private $outDir;
24+
25+
private $client;
26+
27+
public function __construct(string $outDir)
28+
{
29+
$this->outDir = $outDir;
30+
31+
$this->client = HttpClient::createForBaseUri('https://www.unicode.org/Public/UNIDATA/');
32+
}
33+
34+
public function generate(): void
35+
{
36+
$this->writeWideWidthData();
37+
38+
$this->writeZeroWidthData();
39+
}
40+
41+
private function writeWideWidthData(): void
42+
{
43+
if (!preg_match('/^# EastAsianWidth-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'EastAsianWidth.txt')->getContent(), $matches)) {
44+
throw new RuntimeException('The Unicode version could not be determined.');
45+
}
46+
47+
$version = $matches[1];
48+
49+
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))?;[W|F]/m', $content, $matches, PREG_SET_ORDER)) {
50+
throw new RuntimeException('The wide width pattern did not match anything.');
51+
}
52+
53+
$this->write('wcswidth_table_wide.php', $version, $matches);
54+
}
55+
56+
private function writeZeroWidthData(): void
57+
{
58+
if (!preg_match('/^# DerivedGeneralCategory-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'extracted/DerivedGeneralCategory.txt')->getContent(), $matches)) {
59+
throw new RuntimeException('The Unicode version could not be determined.');
60+
}
61+
62+
$version = $matches[1];
63+
64+
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))? *; (?:Me|Mn)/m', $content, $matches, PREG_SET_ORDER)) {
65+
throw new RuntimeException('The zero width pattern did not match anything.');
66+
}
67+
68+
$this->write('wcswidth_table_zero.php', $version, $matches);
69+
}
70+
71+
private function write(string $fileName, string $version, array $rawData): void
72+
{
73+
$content = $this->getHeader($version).'return '.VarExporter::export($this->format($rawData)).";\n";
74+
75+
if (!file_put_contents($this->outDir.'/'.$fileName, $content)) {
76
77+
}
78+
}
79+
80+
private function getHeader(string $version): string
81+
{
82+
$date = (new \DateTimeImmutable())->format('c');
83+
84+
return <<<EOT
85+
<?php
86+
87+
/*
88+
* This file has been auto-generated by the Symfony String Component for internal use.
89+
*
90+
* Unicode version: $version
91+
* Date: $date
92+
*/
93+
94+
95+
EOT;
96+
}
97+
98+
private function format(array $rawData): array
99+
{
100+
$data = array_map(static function (array $row): array {
101+
$start = $row[1];
102+
$end = $row[2] ?? $start;
103+
104+
return [hexdec($start), hexdec($end)];
105+
}, $rawData);
106+
107+
usort($data, static function (array $a, array $b): int {
108+
return $a[0] - $b[0];
109+
});
110+
111+
return $data;
112+
}
113+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\Component\String\Resources\WcswidthDataGenerator;
13+
14+
error_reporting(E_ALL);
15+
16+
set_error_handler(static function (int $type, string $msg, string $file, int $line): void {
17+
throw new \ErrorException($msg, 0, $type, $file, $line);
18+
});
19+
20+
set_exception_handler(static function (\Throwable $exception): void {
21+
echo "\n";
22+
23+
$cause = $exception;
24+
$root = true;
25+
26+
while (null !== $cause) {
27+
if (!$root) {
28+
echo "Caused by\n";
29+
}
30+
31+
echo get_class($cause).': '.$cause->getMessage()."\n";
32+
echo "\n";
33+
echo $cause->getFile().':'.$cause->getLine()."\n";
34+
echo $cause->getTraceAsString()."\n";
35+
36+
$cause = $cause->getPrevious();
37+
$root = false;
38+
}
39+
});
40+
41+
$autoload = __DIR__.'/../../vendor/autoload.php';
42+
43+
if (!file_exists($autoload)) {
44+
echo wordwrap('You should run "composer install" in the component before running this script.', 75)." Aborting.\n";
45+
46+
exit(1);
47+
}
48+
49+
require_once $autoload;
50+
51+
echo "Generating wcswidth tables data...\n";
52+
53+
(new WcswidthDataGenerator(dirname(__DIR__).'/data'))->generate();
54+
55+
echo "Done.\n";

0 commit comments

Comments
 (0)
0