8000 [DomCrawler] Optimize DomCrawler::relativize() · symfony/symfony@17757d8 · GitHub
[go: up one dir, main page]

Skip to content

Commit 17757d8

Browse files
[DomCrawler] Optimize DomCrawler::relativize()
1 parent 5b26e33 commit 17757d8

File tree

1 file changed

+44
-52
lines changed

1 file changed

+44
-52
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 44 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -860,18 +860,44 @@ private function relativize($xpath)
860860
// We cannot simply drop
861861
$nonMatchingExpression = 'a[name() = "b"]';
862862

863-
// Split any unions into individual expressions.
864-
foreach ($this->splitUnionParts($xpath) as $expression) {
865-
$expression = trim($expression);
866-
$parenthesis = '';
867-
868-
// If the union is inside some braces, we need to preserve the opening braces and apply
869-
// the change only inside it.
870-
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
871-
$parenthesis = $matches[0];
872-
$expression = substr($expression, strlen($parenthesis));
863+
$xpathLen = strlen($xpath);
864+
$openedBrackets = 0;
865+
$startPosition = strspn($xpath, " \t\n\r\0\x0B");
866+
867+
for ($i = $startPosition; $i <= $xpathLen; ++$i) {
868+
$i += strcspn($xpath, '"\'[]|', $i);
869+
870+
if ($i < $xpathLen) {
871+
switch ($xpath[$i]) {
872+
case '"':
873+
case "'":
874+
if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
875+
return $xpath; // The XPath expression is invalid
876+
}
877+
continue 2;
878+
case '[':
879+
++$openedBrackets;
880+
continue 2;
881+
case ']':
882+
--$openedBrackets;
883+
continue 2;
884+
}
885+
}
886+
if ($openedBrackets) {
887+
continue;
873888
}
874889

890+
if ($startPosition < $xpathLen && '(' === $xpath[$startPosition]) {
891+
// If the union is inside some braces, we need to preserve the opening braces and apply
892+
// the change only inside it.
893+
$j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $startPosition + 1);
894+
$parenthesis = substr($xpath, $startPosition, $j);
895+
$startPosition += $j;
896+
} else {
897+
$parenthesis = '';
898+
}
899+
$expression = rtrim(substr($xpath, $startPosition, $i - $startPosition));
900+
875901
// BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
876902
if (0 === strpos($expression, '/_root/')) {
877903
$expression = './'.substr($expression, 7);
@@ -880,7 +906,7 @@ private function relativize($xpath)
880906
}
881907

882908
// add prefix before absolute element selector
883-
if (empty($expression)) {
909+
if ('' === $expression) {
884910
$expression = $nonMatchingExpression;
885911
} elseif (0 === strpos($expression, '//')) {
886912
$expression = 'descendant-or-self::'.substr($expression, 2);
@@ -898,58 +924,24 @@ private function relativize($xpath)
898924
// '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
899925
$expression = $nonMatchingExpression;
900926
} elseif (0 === strpos($expression, 'descendant::')) {
901-
$expression = 'descendant-or-self::'.substr($expression, strlen('descendant::'));
927+
$expression = 'descendant-or-self::'.substr($expression, 12);
902928
} elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) {
903929
// the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes)
904930
$expression = $nonMatchingExpression;
905931
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
906932
$expression = 'self::'.$expression;
907933
}
908934
$expressions[] = $parenthesis.$expression;
909-
}
910935

911-
return implode(' | ', $expressions);
912-
}
913-
914-
/**
915-
* Splits the XPath into parts that are separated by the union operator.
916-
*
917-
* @param string $xpath
918-
*
919-
* @return string[]
920-
*/
921-
private function splitUnionParts($xpath)
922-
{
923-
// Split any unions into individual expressions. We need to iterate
924-
// through the string to correctly parse opening/closing quotes and
925-
// braces which is not possible with regular expressions.
926-
$unionParts = array();
927-
$inSingleQuotedString = false;
928-
$inDoubleQuotedString = false;
929-
$openedBrackets = 0;
930-
$lastUnion = 0;
931-
$xpathLength = strlen($xpath);
932-
for ($i = 0; $i < $xpathLength; ++$i) {
933-
$char = $xpath[$i];
934-
935-
if ($char === "'" && !$inDoubleQuotedString) {
936-
$inSingleQuotedString = !$inSingleQuotedString;
937-
} elseif ($char === '"' && !$inSingleQuotedString) {
938-
$inDoubleQuotedString = !$inDoubleQuotedString;
939-
} elseif (!$inSingleQuotedString && !$inDoubleQuotedString) {
940-
if ($char === '[') {
941-
++$openedBrackets;
942-
} elseif ($char === ']') {
943-
--$openedBrackets;
944-
} elseif ($char === '|' && $openedBrackets === 0) {
945-
$unionParts[] = substr($xpath, $lastUnion, $i - $lastUnion);
946-
$lastUnion = $i + 1;
947-
}
936+
if ($i === $xpathLen) {
937+
return implode(' | ', $expressions);
948938
}
939+
940+
$i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
941+
$startPosition = $i + 1;
949942
}
950-
$unionParts[] = substr($xpath, $lastUnion);
951943

952-
return $unionParts;
944+
return $xpath; // The XPath expression is invalid
953945
}
954946

955947
/**

0 commit comments

Comments
 (0)
0