8000 bug #21523 #20411 fix Yaml parsing for very long quoted strings (Rich… · symfony/symfony@ab1d938 · GitHub
[go: up one dir, main page]

Skip to content

Commit ab1d938

Browse files
committed
bug #21523 #20411 fix Yaml parsing for very long quoted strings (RichardBradley)
This PR was squashed before being merged into the 2.7 branch (closes #21523). Discussion ---------- #20411 fix Yaml parsing for very long quoted strings | Q | A | ------------- | --- | Branch? | 2.7 | Bug fix? | yes | New feature? | no | BC breaks? | no | Deprecations? | no | Tests pass? | yes | Fixed tickets | #20411 | License | MIT | Doc PR | no This is a second fix for the issue discussed in #20411. My first PR (#21279) didn't fix the bug in all cases, sorry. If a YAML string has too many spaces in the value, it can trigger a `PREG_BACKTRACK_LIMIT_ERROR` error in the Yaml parser. There should be no behavioural change other than the bug fix I have included a test which fails before this fix and passes after this fix. I have also added checks that detect other PCRE internal errors and throw a more descriptive exception. Before this patch, the YAML engine would often give incorrect results, rather than throwing, on a PCRE `PREG_BACKTRACK_LIMIT_ERROR` error. Commits ------- c9a1c09 #20411 fix Yaml parsing for very long quoted strings
2 parents ac109f1 + c9a1c09 commit ab1d938

File tree

3 files changed

+77
-39
lines changed

3 files changed

+77
-39
lines changed

src/Symfony/Component/Yaml/Inline.php

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,8 @@ public static function dump($value, $exceptionOnInvalidType = false, $objectSupp
149149
case Escaper::requiresDoubleQuoting($value):
150150
return Escaper::escapeWithDoubleQuotes($value);
151151
case Escaper::requiresSingleQuoting($value):
152-
case preg_match(self::getHexRegex(), $value):
153-
case preg_match(self::getTimestampRegex(), $value):
152+
case Parser::preg_match(self::getHexRegex(), $value):
153+
case Parser::preg_match(self::getTimestampRegex(), $value):
154154
return Escaper::escapeWithSingleQuotes($value);
155155
default:
156156
return $value;
@@ -242,10 +242,10 @@ public static function parseScalar($scalar, $delimiters = null, $stringDelimiter
242242
$i += strlen($output);
243243

244244
// remove comments
245-
if (preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
245+
if (Parser::preg_match('/[ \t]+#/', $output, $match, PREG_OFFSET_CAPTURE)) {
246246
$output = substr($output, 0, $match[0][1]);
247247
}
248-
} elseif (preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
248+
} elseif (Parser::preg_match('/^(.+?)('.implode('|', $delimiters).')/', substr($scalar, $i), $match)) {
249249
$output = $match[1];
250250
$i += strlen($output);
251251
} else {
@@ -272,7 +272,7 @@ public static function parseScalar($scalar, $delimiters = null, $stringDelimiter
272272
*/
273273
private static function parseQuotedScalar($scalar, &$i)
274274
{
275-
if (!preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
275+
if (!Parser::preg_match('/'.self::REGEX_QUOTED_STRING.'/Au', substr($scalar, $i), $match)) {
276276
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)));
277277
}
278278

@@ -520,16 +520,16 @@ private static function evaluateScalar($scalar, $references = array())
520520

521521
return '0' == $scalar[1] ? octdec($scalar) : (((string) $raw === (string) $cast) ? $cast : $raw);
522522
case is_numeric($scalar):
523-
case preg_match(self::getHexRegex(), $scalar):
523+
case Parser::preg_match(self::getHexRegex(), $scalar):
524524
return '0x' === $scalar[0].$scalar[1] ? hexdec($scalar) : (float) $scalar;
525525
case '.inf' === $scalarLower:
526526
case '.nan' === $scalarLower:
527527
return -log(0);
528528
case '-.inf' === $scalarLower:
529529
return log(0);
530-
case preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar):
530+
case Parser::preg_match('/^(-|\+)?[0-9,]+(\.[0-9]+)?$/', $scalar):
531531
return (float) str_replace(',', '', $scalar);
532-
case preg_match(self::getTimestampRegex(), $scalar):
532+
case Parser::preg_match(self::getTimestampRegex(), $scalar):
533533
$timeZone = date_default_timezone_get();
534534
date_default_timezone_set('UTC');
535535
$time = strtotime($scalar);

src/Symfony/Component/Yaml/Parser.php

Lines changed: 57 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public function __construct($offset = 0, $totalNumberOfLines = null, array $skip
6161
*/
6262
public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false)
6363
{
64-
if (!preg_match('//u', $value)) {
64+
if (false === preg_match('//u', $value)) {
6565
throw new ParseException('The YAML value does not appear to be valid UTF-8.');
6666
}
6767
$this->currentLineNb = -1;
@@ -92,13 +92,13 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
9292
}
9393

9494
$isRef = $mergeNode = false;
95-
if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
95+
if (self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) {
9696
if ($context && 'mapping' == $context) {
9797
throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine);
9898
}
9999
$context = 'sequence';
100100

101-
if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
101+
if (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
102102
$isRef = $matches['ref'];
103103
$values['value'] = $matches['value'];
104104
}
@@ -108,7 +108,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
108108
$data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap);
109109
} else {
110110
if (isset($values['leadspaces'])
111-
&& preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches)
111+
&& self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($values['value']), $matches)
112112
) {
113113
// this is a compact notation element, add to next block and parse
114114
$block = $values['value'];
@@ -124,7 +124,10 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
124124
if ($isRef) {
125125
$this->refs[$isRef] = end($data);
126126
}
127-
} elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values) && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))) {
127+
} elseif (
128+
self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
129+
&& (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))
130+
) {
128131
if ($context && 'sequence' == $context) {
129132
throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine);
130133
}
@@ -191,7 +194,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
191194
$data += $parsed; // array union
192195
}
193196
}
194-
} elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
197+
} elseif (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
195198
$isRef = $matches['ref'];
196199
$values['value'] = $matches['value'];
197200
}
@@ -254,27 +257,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport =
254257
return $value;
255258
}
256259

257-
switch (preg_last_error()) {
258-
case PREG_INTERNAL_ERROR:
259-
$error = 'Internal PCRE error.';
260-
break;
261-
case PREG_BACKTRACK_LIMIT_ERROR:
262-
$error = 'pcre.backtrack_limit reached.';
263-
break;
264-
case PREG_RECURSION_LIMIT_ERROR:
265-
$error = 'pcre.recursion_limit reached.';
266-
break;
267-
case PREG_BAD_UTF8_ERROR:
268-
$error = 'Malformed UTF-8 data.';
269-
break;
270-
case PREG_BAD_UTF8_OFFSET_ERROR:
271-
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
272-
break;
273-
default:
274-
$error = 'Unable to parse.';
275-
}
276-
277-
throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine);
260+
throw new ParseException('Unable to parse', $this->getRealCurrentLineNb() + 1, $this->currentLine);
278261
}
279262
}
280263

@@ -508,7 +491,7 @@ private function parseValue($value, $exceptionOnInvalidType, $objectSupport, $ob
508491
return $this->refs[$value];
509492
}
510493

511-
if (preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
494+
EED3 if (self::preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
512495
$modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
513496

514497
return $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers));
@@ -554,7 +537,7 @@ private function parseBlockScalar($style, $chomping = '', $indentation = 0)
554537

555538
// determine indentation if not specified
556539
if (0 === $indentation) {
557-
if (preg_match('/^ +/', $this->currentLine, $matches)) {
540+
if (self::preg_match('/^ +/', $this->currentLine, $matches)) {
558541
$indentation = strlen($matches[0]);
559542
}
560543
}
@@ -565,7 +548,7 @@ private function parseBlockScalar($style, $chomping = '', $indentation = 0)
565548
while (
566549
$notEOF && (
567550
$isCurrentLineBlank ||
568-
preg_match($pattern, $this->currentLine, $matches)
551+
self::preg_match($pattern, $this->currentLine, $matches)
569552
)
570553
) {
571554
if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) {
@@ -788,6 +771,49 @@ private function isStringUnIndentedCollectionItem()
788771
*/
789772
private function isBlockScalarHeader()
790773
{
791-
return (bool) preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
774+
return (bool) self::preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine);
775+
}
776+
777+
/**
778+
* A local wrapper for `preg_match` which will throw a ParseException if there
779+
* is an internal error in the PCRE engine.
780+
*
781+
* This avoids us needing to check for "false" every time PCRE is used
782+
* in the YAML engine
783+
*
784+
* @throws ParseException on a PCRE internal error
785+
*
786+
* @see preg_last_error()
787+
*
788+
* @internal
789+
*/
790+
public static function preg_match($pattern, $subject, &$matches = null, $flags = 0, $offset = 0)
791+
{
792+
$ret = preg_match($pattern, $subject, $matches, $flags, $offset);
793+
if ($ret === false) {
794+
switch (preg_last_error()) {
795+
case PREG_INTERNAL_ERROR:
796+
$error = 'Internal PCRE error.';
797+
break;
798+
case PREG_BACKTRACK_LIMIT_ERROR:
799+
$error = 'pcre.backtrack_limit reached.';
800+
break;
801+
case PREG_RECURSION_LIMIT_ERROR:
802+
$error = 'pcre.recursion_limit reached.';
803+
break;
804+
case PREG_BAD_UTF8_ERROR:
805+
$error = 'Malformed UTF-8 data.';
806+
break;
807+
case PREG_BAD_UTF8_OFFSET_ERROR:
808+
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
809+
break;
810+
default:
811+
$error = 'Error.';
812+
}
813+
814+
throw new ParseException($error);
815+
}
816+
817+
return $ret;
792818
}
793819
}

src/Symfony/Component/Yaml/Tests/ParserTest.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
class ParserTest extends TestCase
1919
{
20+
/** @var Parser */
2021
protected $parser;
2122

2223
protected function setUp()
@@ -1146,6 +1147,17 @@ public function parserThrowsExceptionWithCorrectLineNumberProvider()
11461147
),
11471148
);
11481149
}
1150+
1151+
public function testCanParseVeryLongValue()
1152+
{
1153+
$longStringWithSpaces = str_repeat('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ', 20000);
1154+
$trickyVal = array('x' => $longStringWithSpaces);
1155+
1156+
$yamlString = Yaml::dump($trickyVal);
1157+
$arrayFromYaml = $this->parser->parse($yamlString);
1158+
1159+
$this->assertEquals($trickyVal, $arrayFromYaml);
1160+
}
11491161
}
11501162

11511163
class B

0 commit comments

Comments
 (0)
0