8000 bug #20235 [DomCrawler] Allow pipe (|) character in link tags when us… · symfony/symfony@8dee4be · GitHub
[go: up one dir, main page]

Skip to content

Commit 8dee4be

Browse files
bug #20235 [DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions (klausi, nicolas-grekas)
This PR was merged into the 2.7 branch. Discussion ---------- [DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions | Q | A | ------------- | --- | Branch? | master | Bug fix? | yes | New feature? | no | BC breaks? | no | Deprecations? | no | Tests pass? | yes | Fixed tickets | #20229 | License | MIT | Doc PR | - @klausi could you please validate this patch? Is it an improvement over yours? (sorry I don't have the proper use case to test.) Commits ------- 17757d8 [DomCrawler] Optimize DomCrawler::relativize() 5b26e33 [DomCrawler] Allow pipe (|) character in link tags when using Xpath expressions
2 parents 0e8b2a3 + 17757d8 commit 8dee4be

File tree

2 files changed

+49
-14
lines changed

2 files changed

+49
-14
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -856,22 +856,47 @@ private function relativize($xpath)
856856
{
857857
$expressions = array();
858858

859-
$unionPattern = '/\|(?![^\[]*\])/';
860859
// An expression which will never match to replace expressions which cannot match in the crawler
861860
// We cannot simply drop
862861
$nonMatchingExpression = 'a[name() = "b"]';
863862

864-
// Split any unions into individual expressions.
865-
foreach (preg_split($unionPattern, $xpath) as $expression) {
866-
$expression = trim($expression);
867-
$parenthesis = '';
863+
$xpathLen = strlen($xpath);
864+
$openedBrackets = 0;
865+
$startPosition = strspn($xpath, " \t\n\r\0\x0B");
868866

869-
// If the union is inside some braces, we need to preserve the opening braces and apply
870-
// the change only inside it.
871-
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
872-
$parenthesis = $matches[0];
873-
$expression = substr($expression, strlen($parenthesis));
867+
for ($i = $startPosition; $i <= $xpathLen; ++$i) {
868+
$i += strcspn($xpath, '"\'[]|', $i);
869+
870+
if ($i < $xpathLen) {
871+
switch ($xpath[$i]) {
872+
case '"':
873+
case "'":
874+
if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
875+
return $xpath; // The XPath expression is invalid
876+
}
877+
continue 2;
878+
case '[':
879+
++$openedBrackets;
880+
continue 2;
881+
case ']':
882+
--$openedBrackets;
883+
continue 2;
884+
}
885+
}
886+
if ($openedBrackets) {
887+
continue;
888+
}
889+
890+
if ($startPosition < $xpathLen && '(' === $xpath[$startPosition]) {
891+
// If the union is inside some braces, we need to preserve the opening braces and apply
892+
// the change only inside it.
893+
$j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $startPosition + 1);
894+
$parenthesis = substr($xpath, $startPo 8000 sition, $j);
895+
$startPosition += $j;
896+
} else {
897+
$parenthesis = '';
874898
}
899+
$expression = rtrim(substr($xpath, $startPosition, $i - $startPosition));
875900

876901
// BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
877902
if (0 === strpos($expression, '/_root/')) {
@@ -881,7 +906,7 @@ private function relativize($xpath)
881906
}
882907

883908
// add prefix before absolute element selector
884-
if (empty($expression)) {
909+
if ('' === $expression) {
885910
$expression = $nonMatchingExpression;
886911
} elseif (0 === strpos($expression, '//')) {
887912
$expression = 'descendant-or-self::'.substr($expression, 2);
@@ -899,17 +924,24 @@ private function relativize($xpath)
899924
// '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
900925
$expression = $nonMatchingExpression;
901926
} elseif (0 === strpos($expression, 'descendant::')) {
902-
$expression = 'descendant-or-self::'.substr($expression, strlen('descendant::'));
927+
$expression = 'descendant-or-self::'.substr($expression, 12);
903928
} elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) {
904929
// the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes)
905930
$expression = $nonMatchingExpression;
906931
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
907932
$expression = 'self::'.$expression;
908933
}
909934
$expressions[] = $parenthesis.$expression;
935+
936+
if ($i === $xpathLen) {
937+
return implode(' | ', $expressions);
938+
}
939+
940+
$i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
941+
$startPosition = $i + 1;
910942
}
911943

912-
return implode(' | ', $expressions);
944+
return $xpath; // The XPath expression is invalid
913945
}
914946

915947
/**

src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ public function testFilterXpathComplexQueries()
387387
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
388388
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
389389
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
390+
$this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]"));
390391
}
391392

392393
public function testFilterXPath()
@@ -548,7 +549,7 @@ public function testFilterXPathWithSelfAxes()
548549

549550
$this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name');
550551
$this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name');
551-
$this->assertCount(9, $crawler->filterXPath('self::*/a'));
552+
$this->assertCount(10, $crawler->filterXPath('self::*/a'));
552553
}
553554

554555
public function testFilter()
@@ -969,6 +970,8 @@ public function createTestCrawler($uri = null)
969970
970971
<a href="?get=param">GetLink</a>
971972
973+
<a href="/example">Klausi|Claudiu</a>
974+
972975
<form action="foo" id="FooFormId">
973976
<input type="text" value="TextValue" name="TextName" />
974977
<input type="submit" value="FooValue" name="FooName" id="FooId" />

0 commit comments

Comments
 (0)
0