10000 [YAML] Improve performance of YAML parser · symfony/symfony@7a7c966 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7a7c966

Browse files
NamelessCodernicolas-grekas
authored andcommitted
[YAML] Improve performance of YAML parser
Optimise various methods and conditions to use best performing alternatives where possible. Roughly: * Uses methods that do not copy memory, e.g. strncmp as alternative for strpos matching beginning of string. * Switches order of some conditions to put the cheapest checks first in order. * Checks input before calling trim() - despite the function returning the same string as input, it still costs memory and introduces unnecessary overhead. * Extracts variables for repeated identical function calls. * Uses negative substring offsets instead of strlen + substr. * Replaces single-char substr usages with substring access.
1 parent ecf37dd commit 7a7c966

File tree

2 files changed

+44
-34
lines changed

2 files changed

+44
-34
lines changed

src/Symfony/Component/Yaml/Inline.php

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ private static function dumpNull(int $flags): string
269269
*/
270270
public static function parseScalar(string $scalar, int $flags = 0, array $delimiters = null, int &$i = 0, bool $evaluate = true, array $references = [])
271271
{
272-
if (\in_array($scalar[$i], ['"', "'"])) {
272+
if (\in_array($scalar[$i], ['"', "'"], true)) {
273273
// quoted scalar
274274
$output = self::parseQuotedScalar($scalar, $i);
275275

@@ -324,7 +324,7 @@ private static function parseQuotedScalar(string $scalar, int &$i): string
324324
throw new ParseException(sprintf('Malformed inline YAML string: %s.', substr($scalar, $i)), self::$parsedLineNumber + 1, $scalar, self::$parsedFilename);
325325
}
326326

327-
$output = substr($match[0], 1, \strlen($match[0]) - 2);
327+
$output = substr($match[0], 1, -1);
328328

329329
$unescaper = new Unescaper();
330330
if ('"' == $scalar[$i]) {
@@ -371,7 +371,7 @@ private static function parseSequence(string $sequence, int $flags, int &$i = 0,
371371
$value = self::parseMapping($sequence, $flags, $i, $references);
372372
break;
373373
default:
374-
$isQuoted = \in_array($sequence[$i], ['"', "'"]);
374+
$isQuoted = \in_array($sequence[$i], ['"', "'"], true);
375375
$value = self::parseScalar($sequence, $flags, [',', ']'], $i, null === $tag, $references);
376376

377377
// the value can be an array if a reference has been resolved to an array var
@@ -551,9 +551,8 @@ private static function parseMapping(string $mapping, int $flags, int &$i = 0, a
551551
private static function evaluateScalar(string $scalar, int $flags, array $references = [])
552552
{
553553
$scalar = trim($scalar);
554-
$scalarLower = strtolower($scalar);
555554

556-
if (0 === strpos($scalar, '*')) {
555+
if ('*' === ($scalar[0] ?? '')) {
557556
if (false !== $pos = strpos($scalar, '#')) {
558557
$value = substr($scalar, 1, $pos - 2);
559558
} else {
@@ -572,6 +571,8 @@ private static function evaluateScalar(string $scalar, int $flags, array $refere
572571
return $references[$value];
573572
}
574573

574+
$scalarLower = strtolower($scalar);
575+
575576
switch (true) {
576577
case 'null' === $scalarLower:
577578
case '' === $scalar:
@@ -583,11 +584,11 @@ private static function evaluateScalar(string $scalar, int $flags, array $refere
583584
return false;
584585
case '!' === $scalar[0]:
585586
switch (true) {
586-
case 0 === strpos($scalar, '!!str '):
587+
case 0 === strncmp($scalar, '!!str ', 6):
587588
return (string) substr($scalar, 6);
588-
case 0 === strpos($scalar, '! '):
589+
case 0 === strncmp($scalar, '! ', 2):
589590
return substr($scalar, 2);
590-
case 0 === strpos($scalar, '!php/object'):
591+
case 0 === strncmp($scalar, '!php/object', 11):
591592
if (self::$objectSupport) {
592593
if (!isset($scalar[12])) {
593594
@trigger_error('Using the !php/object tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED);
@@ -603,7 +604,7 @@ private static function evaluateScalar(string $scalar, int $flags, array $refere
603604
}
604605

605606
return null;
606-
case 0 === strpos($scalar, '!php/const'):
607+
case 0 === strncmp($scalar, '!php/const', 10):
607608
if (self::$constantSupport) {
608609
if (!isset($scalar[11])) {
609610
@trigger_error('Using the !php/const tag without a value is deprecated since Symfony 5.1.', E_USER_DEPRECATED);
@@ -623,17 +624,17 @@ private static function evaluateScalar(string $scalar, int $flags, array $refere
623624
}
624625

625626
return null;
626-
case 0 === strpos($scalar, '!!float '):
627+
case 0 === strncmp($scalar, '!!float ', 8):
627628
return (float) substr($scalar, 8);
628-
case 0 === strpos($scalar, '!!binary '):
629+
case 0 === strncmp($scalar, '!!binary ', 9):
629630
return self::evaluateBinaryScalar(substr($scalar, 9));
630631
default:
631632
throw new ParseException(sprintf('The string "%s" could not be parsed as it uses an unsupported built-in tag.', $scalar), self::$parsedLineNumber, $scalar, self::$parsedFilename);
632633
}
633634

634635
// Optimize for returning strings.
635636
// no break
636-
case '+' === $scalar[0] || '-' === $scalar[0] || '.' === $scalar[0] || is_numeric($scalar[0]):
637+
case \in_array($scalar[0], ['+', '-', '.'], true) || is_numeric($scalar[0]):
637638
if (Parser::preg_match('{^[+-]?[0-9][0-9_]*$}', $scalar)) {
638639
$scalar = str_replace('_', '', (string) $scalar);
639640
}

src/Symfony/Component/Yaml/Parser.php

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class Parser
2828

2929
private $filename;
3030
private $offset = 0;
31+
private $numberOfParsedLines = 0;
3132
private $totalNumberOfLines;
3233
private $lines = [];
3334
private $currentLineNb = -1;
@@ -99,6 +100,7 @@ public function parse(string $value, int $flags = 0)
99100
}
100101
$this->lines = [];
101102
$this->currentLine = '';
103+
$this->numberOfParsedLines = 0;
102104
$this->refs = [];
103105
$this->skippedLineNumbers = [];
104106
$this->locallySkippedLineNumbers = [];
@@ -113,10 +115,11 @@ private function doParse(string $value, int $flags)
113115
$this->currentLine = '';
114116
$value = $this->cleanup($value);
115117
$this->lines = explode("\n", $value);
118+
$this->numberOfParsedLines = \count($this->lines);
116119
$this->locallySkippedLineNumbers = [];
117120

118121
if (null === $this->totalNumberOfLines) {
119-
$this->totalNumberOfLines = \count($this->lines);
122+
$this->totalNumberOfLines = $this->numberOfParsedLines;
120123
}
121124

122125
if (!$this->moveToNextLine()) {
@@ -291,7 +294,7 @@ private function doParse(string $value, int $flags)
291294
$subTag = null;
292295
if ($mergeNode) {
293296
// Merge keys
294-
} elseif (!isset($values['value']) || '' === $values['value'] || 0 === strpos($values['value'], '#') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
297+
} elseif (!isset($values['value']) || '' === $values['value'] || '#' === ($values['value'][0] ?? '') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
295298
// hash
296299
// if next line is less indented or equal, then it means that the current value is null
297300
if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
@@ -430,7 +433,8 @@ private function doParse(string $value, int $flags)
430433
$value = '';
431434

432435
foreach ($this->lines as $line) {
433-
if ('' !== ltrim($line) && '#' === ltrim($line)[0]) {
436+
$trimmedLine = trim($line);
437+
if ('#' === ($trimmedLine[0] ?? '')) {
434438
continue;
435439
}
436440
// If the indentation is not consistent at offset 0, it is to be considered as a ParseError
@@ -442,22 +446,22 @@ private function doParse(string $value, int $flags)
442446
throw new ParseException('Mapping values are not allowed in multi-line blocks.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
443447
}
444448

445-
if ('' === trim($line)) {
449+
if ('' === $trimmedLine) {
446450
$value .= "\n";
447451
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
448452
$value .= ' ';
449453
}
450454

451-
if ('' !== trim($line) && '\\' === substr($line, -1)) {
455+
if ('' !== $trimmedLine && '\\' === $line[-1]) {
452456
$value .= ltrim(substr($line, 0, -1));
453-
} elseif ('' !== trim($line)) {
454-
$value .= trim($line);
457+
} elseif ('' !== $trimmedLine) {
458+
$value .= $trimmedLine;
455459
}
456460

457-
if ('' === trim($line)) {
461+
if ('' === $trimmedLine) {
458462
$previousLineWasNewline = true;
459463
$previousLineWasTerminatedWithBackslash = false;
460-
} elseif ('\\' === substr($line, -1)) {
464+
} elseif ('\\' === $line[-1]) {
461465
$previousLineWasNewline = false;
462466
$previousLineWasTerminatedWithBackslash = true;
463467
} else {
@@ -481,7 +485,7 @@ private function doParse(string $value, int $flags)
481485
$data = new TaggedValue($tag, $data);
482486
}
483487

484-
if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && !\is_object($data) && 'mapping' === $context) {
488+
if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && 'mapping' === $context && !\is_object($data)) {
485489
$object = new \stdClass();
486490

487491
foreach ($data as $key => $value) {
@@ -545,6 +549,10 @@ public function getRealCurrentLineNb(): int
545549
*/
546550
private function getCurrentLineIndentation(): int
547551
{
552+
if (' ' !== ($this->currentLine[0] ?? '')) {
553+
return 0;
554+
}
555+
548556
return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' '));
549557
}
550558

@@ -653,7 +661,7 @@ private function getNextEmbedBlock(int $indentation = null, bool $inSequence = f
653661
*/
654662
private function moveToNextLine(): bool
655663
{
656-
if ($this->currentLineNb >= \count($this->lines) - 1) {
664+
if ($this->currentLineNb >= $this->numberOfParsedLines - 1) {
657665
return false;
658666
}
659667

@@ -689,7 +697,7 @@ private function moveToPreviousLine(): bool
689697
*/
690698
private function parseValue(string $value, int $flags, string $context)
691699
{
692-
if (0 === strpos($value, '*')) {
700+
if ('*' === ($value[0] ?? '')) {
693701
if (false !== $pos = strpos($value, '#')) {
694702
$value = substr($value, 1, $pos - 2);
695703
} else {
@@ -750,7 +758,7 @@ private function parseValue(string $value, int $flags, string $context)
750758
$lines[] = trim($this->currentLine);
751759

752760
// quoted string values end with a line that is terminated with the quotation character
753-
if ('' !== $this->currentLine && substr($this->currentLine, -1) === $quotation) {
761+
if ('' !== $this->currentLine && $this->currentLine[-1] === $quotation) {
754762
break;
755763
}
756764
}
@@ -944,7 +952,7 @@ private function isCurrentLineEmpty(): bool
944952
*/
945953
private function isCurrentLineBlank(): bool
946954
{
947-
return '' == trim($this->currentLine, ' ');
955+
return '' === $this->currentLine || '' === trim($this->currentLine, ' ');
948956
}
949957

950958
/**
@@ -955,7 +963,7 @@ private function isCurrentLineBlank(): bool
955963
private function isCurrentLineComment(): bool
956964
{
957965
//checking explicitly the first char of the trim is faster than loops or strpos
958-
$ltrimmedLine = ltrim($this->currentLine, ' ');
966+
$ltrimmedLine = ' ' === $this->currentLine[0] ? ltrim($this->currentLine, ' ') : $this->currentLine;
959967

960968
return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0];
961969
}
@@ -1041,7 +1049,7 @@ private function isNextLineUnIndentedCollection(): bool
10411049
*/
10421050
private function isStringUnIndentedCollectionItem(): bool
10431051
{
1044-
return '-' === rtrim($this->currentLine) || 0 === strpos($this->currentLine, '- ');
1052+
return 0 === strncmp($this->currentLine, '- ', 2) || '-' === rtrim($this->currentLine);
10451053
}
10461054

10471055
/**
@@ -1144,22 +1152,23 @@ private function parseQuotedString(string $yaml): ?string
11441152
$value = '';
11451153

11461154
for ($i = 0, $linesCount = \count($lines), $previousLineWasNewline = false, $previousLineWasTerminatedWithBackslash = false; $i < $linesCount; ++$i) {
1147-
if ('' === trim($lines[$i])) {
1155+
$trimmedLine = trim($lines[$i]);
1156+
if ('' === $trimmedLine) {
11481157
$value .= "\n";
11491158
} elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
11501159
$value .= ' ';
11511160
}
11521161

1153-
if ('' !== trim($lines[$i]) && '\\' === substr($lines[$i], -1)) {
1162+
if ('' !== $trimmedLine && '\\' === $lines[$i][-1]) {
11541163
$value .= ltrim(substr($lines[$i], 0, -1));
1155-
} elseif ('' !== trim($lines[$i])) {
1156-
$value .= trim($lines[$i]);
1164+
} elseif ('' !== $trimmedLine) {
1165+
$value .= $trimmedLine;
11571166
}
11581167

1159-
if ('' === trim($lines[$i])) {
1168+
if ('' === $trimmedLine) {
11601169
$previousLineWasNewline = true;
11611170
$previousLineWasTerminatedWithBackslash = false;
1162-
} elseif ('\\' === substr($lines[$i], -1)) {
1171+
} elseif ('\\' === $lines[$i][-1]) {
11631172
$previousLineWasNewline = false;
11641173
$previousLineWasTerminatedWithBackslash = true;
11651174
} else {

0 commit comments

Comments
 (0)
0