8000 [String] Made AbstractString::width() follow POSIX.1-2001 by fancyweb · Pull Request #35156 · symfony/symfony · GitHub
[go: up one dir, main page]

Skip to content

[String] Made AbstractString::width() follow POSIX.1-2001 #35156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Symfony/Component/String/.gitattributes
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/Resources/bin/update-data.php export-ignore
/Resources/WcswidthDataGenerator.php export-ignore
/Tests export-ignore
/phpunit.xml.dist export-ignore
/.gitignore export-ignore
3 changes: 3 additions & 0 deletions src/Symfony/Component/String/AbstractString.php
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,9 @@ public function truncate(int $length, string $ellipsis = ''): self
*/
abstract public function upper(): self;

/**
* Returns the printable length on a terminal.
*/
abstract public function width(bool $ignoreAnsiDecoration = true): int;

/**
Expand Down
96 changes: 84 additions & 12 deletions src/Symfony/Component/String/AbstractUnicodeString.php
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
return $str;
}

/**
* {@inheritdoc}
*/
public function reverse(): parent
{
$str = clone $this;
Expand Down Expand Up @@ -444,22 +441,21 @@ public function width(bool $ignoreAnsiDecoration = true): int
$s = str_replace(["\r\n", "\r"], "\n", $s);
}

if (!$ignoreAnsiDecoration) {
$s = preg_replace('/[\p{Cc}\x7F]++/u', '', $s);
}

foreach (explode("\n", $s) as $s) {
if ($ignoreAnsiDecoration) {
$s = preg_replace('/\x1B(?:
$s = preg_replace('/(?:\x1B(?:
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
| [P\]X^_] .*? \x1B\\\\
| [\x41-\x7E]
)/x', '', $s);
)|[\p{Cc}\x7F]++)/xu', '', $s);
}

$w = substr_count($s, "\xAD") - substr_count($s, "\x08");
$s = preg_replace('/[\x00\x05\x07\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11FF}\x{200B}]+/u', '', $s);
$s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide);

if ($width < $w += mb_strlen($s, 'UTF-8') + ($wide << 1)) {
$width = $w;
}
// Non printable characters have been dropped, so wcswidth cannot logically return -1.
$width += $this->wcswidth($s);
}

return $width;
Expand Down Expand Up @@ -503,4 +499,80 @@ private function pad(int $len, self $pad, int $type): parent
throw new InvalidArgumentException('Invalid padding type.');
}
}

/**
* Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
*/
private function wcswidth(string $string): int
{
$width = 0;

foreach (preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY) as $c) {
$codePoint = mb_ord($c, 'UTF-8');

if (0 === $codePoint // NULL
|| 0x034F === $codePoint // COMBINING GRAPHEME JOINER
|| (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
|| 0x2028 === $codePoint // LINE SEPARATOR
|| 0x2029 === $codePoint // PARAGRAPH SEPARATOR
|| (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
|| (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
) {
continue;
}

// Non printable characters
if (32 > $codePoint // C0 control characters
|| (0x07F <= $codePoint && 0x0A0 > $codePoint) // C1 control characters and DEL
) {
return -1;
}

static $tableZero;
if (null === $tableZero) {
$tableZero = require __DIR__.'/Resources/data/wcswidth_table_zero.php';
}

if ($codePoint >= $tableZero[0][0] && $codePoint <= $tableZero[$ubound = \count($tableZero) - 1][1]) {
$lbound = 0;
while ($ubound >= $lbound) {
$mid = floor(($lbound + $ubound) / 2);

if ($codePoint > $tableZero[$mid][1]) {
$lbound = $mid + 1;
} elseif ($codePoint < $tableZero[$mid][0]) {
$ubound = $mid - 1;
} else {
continue 2;
}
}
}

static $tableWide;
if (null === $tableWide) {
$tableWide = require __DIR__.'/Resources/data/wcswidth_table_wide.php';
}

if ($codePoint >= $tableWide[0][0] && $codePoint <= $tableWide[$ubound = \count($tableWide) - 1][1]) {
$lbound = 0;
while ($ubound >= $lbound) {
$mid = floor(($lbound + $ubound) / 2);

if ($codePoint > $tableWide[$mid][1]) {
$lbound = $mid + 1;
} elseif ($codePoint < $tableWide[$mid][0]) {
$ubound = $mid - 1;
} else {
$width += 2;

continue 2;
}
}
}

++$width;
}

return $width;
}
}
28 changes: 2 additions & 26 deletions src/Symfony/Component/String/ByteString.php
Original file line number Diff line number Diff line change
Expand Up @@ -303,9 +303,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
return $str;
}

/**
* {@inheritdoc}
*/
public function reverse(): parent
{
$str = clone $this;
Expand Down Expand Up @@ -460,29 +457,8 @@ public function upper(): parent

public function width(bool $ignoreAnsiDecoration = true): int
{
$width = 0;
$s = str_replace(["\x00", "\x05", "\x07"], '', $this->string);
$string = preg_match('//u', $this->string) ? $this->string : preg_replace('/[\x80-\xFF]/', '?', $this->string);

if (false !== strpos($s, "\r")) {
$s = str_replace(["\r\n", "\r"], "\n", $s);
}

foreach (explode("\n", $s) as $s) {
if ($ignoreAnsiDecoration) {
$s = preg_replace('/\x1B(?:
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
| [P\]X^_] .*? \x1B\\\\
| [\x41-\x7E]
)/x', '', $s);
}

$w = substr_count($s, "\xAD") - substr_count($s, "\x08");

if ($width < $w += \strlen($s)) {
$width = $w;
}
}

return $width;
return (new CodePointString($string))->width($ignoreAnsiDecoration);
}
}
1 change: 1 addition & 0 deletions src/Symfony/Component/String/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CHANGELOG
-----

* Added the `AbstractString::reverse()` method.
* Made `AbstractString::width()` follow POSIX.1-2001.

5.0.0
-----
Expand Down
113 changes: 113 additions & 0 deletions src/Symfony/Component/String/Resources/WcswidthDataGenerator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\String\Resources;

use Symfony\Component\HttpClient\HttpClient;
use Symfony\Component\String\Exception\RuntimeException;
use Symfony\Component\VarExporter\VarExporter;

/**
* @internal
*/
final class WcswidthDataGenerator
{
private $outDir;

private $client;

public function __construct(string $outDir)
{
$this->outDir = $outDir;

$this->client = HttpClient::createForBaseUri('https://www.unicode.org/Public/UNIDATA/');
}

public function generate(): void
{
$this->writeWideWidthData();

$this->writeZeroWidthData();
}

private function writeWideWidthData(): void
{
if (!preg_match('/^# EastAsianWidth-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'EastAsianWidth.txt')->getContent(), $matches)) {
throw new RuntimeException('The Unicode version could not be determined.');
}

$version = $matches[1];

if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))?;[W|F]/m', $content, $matches, PREG_SET_ORDER)) {
throw new RuntimeException('The wide width pattern did not match anything.');
}

$this->write('wcswidth_table_wide.php', $version, $matches);
}

private function writeZeroWidthData(): void
{
if (!preg_match('/^# DerivedGeneralCategory-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'extracted/DerivedGeneralCategory.txt')->getContent(), $matches)) {
throw new RuntimeException('The Unicode version could not be determined.');
}

$version = $matches[1];

if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))? *; (?:Me|Mn)/m', $content, $matches, PREG_SET_ORDER)) {
throw new RuntimeException('The zero width pattern did not match anything.');
}

$this->write('wcswidth_table_zero.php', $version, $matches);
}

private function write(string $fileName, string $version, array $rawData): void
{
$content = $this->getHeader($version).'return '.VarExporter::export($this->format($rawData)).";\n";

if (!file_put_contents($this->outDir.'/'.$fileName, $content)) {
throw new RuntimeException(sprintf('The "%s" file could not be written.', $fileName));
}
}

private function getHeader(string $version): string
{
$date = (new \DateTimeImmutable())->format('c');

return <<<EOT
<?php

/*
* This file has been auto-generated by the Symfony String Component for internal use.
*
* Unicode version: $version
* Date: $date
*/


EOT;
}

private function format(array $rawData): array
{
$data = array_map(static function (array $row): array {
$start = $row[1];
$end = $row[2] ?? $start;

return [hexdec($start), hexdec($end)];
}, $rawData);

usort($data, static function (array $a, array $b): int {
return $a[0] - $b[0];
});

return $data;
}
}
55 changes: 55 additions & 0 deletions src/Symfony/Component/String/Resources/bin/update-data.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\Component\String\Resources\WcswidthDataGenerator;

error_reporting(E_ALL);

set_error_handler(static function (int $type, string $msg, string $file, int $line): void {
throw new \ErrorException($msg, 0, $type, $file, $line);
});

set_exception_handler(static function (\Throwable $exception): void {
echo "\n";

$cause = $exception;
$root = true;

while (null !== $cause) {
if (!$root) {
echo "Caused by\n";
}

echo get_class($cause).': '.$cause->getMessage()."\n";
echo "\n";
echo $cause->getFile().':'.$cause->getLine()."\n";
echo $cause->getTraceAsString()."\n";

$cause = $cause->getPrevious();
$root = false;
}
});

$autoload = __DIR__.'/../../vendor/autoload.php';

if (!file_exists($autoload)) {
echo wordwrap('You should run "composer install" in the component before running this script.', 75)." Aborting.\n";

exit(1);
}

require_once $autoload;

echo "Generating wcswidth tables data...\n";

(new WcswidthDataGenerator(dirname(__DIR__).'/data'))->generate();

echo "Done.\n";
Loading
0