8000 Extracted code to expand an URI to `UriExpanderTrait` · symfony/symfony@2839414 · GitHub
[go: up one dir, main page]

Skip to content

Commit 2839414

Browse files
committed
Extracted code to expand an URI to UriExpanderTrait
1 parent ef30ef5 commit 2839414

File tree

4 files changed

+223
-72
lines changed

4 files changed

+223
-72
lines changed

src/Symfony/Component/DomCrawler/AbstractUriElement.php

Lines changed: 1 addition & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -80,46 +80,7 @@ public function getMethod()
8080
*/
8181
public function getUri()
8282
{
83-
$uri = trim($this->getRawUri());
84-
85-
// absolute URL?
86-
if (null !== parse_url($uri, PHP_URL_SCHEME)) {
87-
return $uri;
88-
}
89-
90-
// empty URI
91-
if (!$uri) {
92-
return $this->currentUri;
93-
}
94-
95-
// an anchor
96-
if ('#' === $uri[0]) {
97-
return $this->cleanupAnchor($this->currentUri).$uri;
98-
}
99-
100-
$baseUri = $this->cleanupUri($this->currentUri);
101-
102-
if ('?' === $uri[0]) {
103-
return $baseUri.$uri;
104-
}
105-
106-
// absolute URL with relative schema
107-
if (0 === strpos($uri, '//')) {
108-
return preg_replace('#^([^/]*)//.*$#', '$1', $baseUri).$uri;
109-
}
110-
111-
$baseUri = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUri);
112-
113-
// absolute path
114-
if ('/' === $uri[0]) {
115-
return $baseUri.$uri;
116-
}
117-
118-
// relative path
119-
$path = parse_url(substr($this->currentUri, \strlen($baseUri)), PHP_URL_PATH);
120-
$path = $this->canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri);
121-
122-
return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path;
83+
return UriExpander::expandUri($this->getRawUri(), $this->currentUri);
12384
}
12485

12586
/**
@@ -167,36 +128,4 @@ protected function canonicalizePath(string $path)
167128
* @throws \LogicException If given node is not an anchor
168129
*/
169130
abstract protected function setNode(\DOMElement $node);
170-
171-
/**
172-
* Removes the query string and the anchor from the given uri.
173-
*/
174-
private function cleanupUri(string $uri): string
175-
{
176-
return $this->cleanupQuery($this->cleanupAnchor($uri));
177-
}
178-
179-
/**
180-
* Remove the query string from the uri.
181-
*/
182-
private function cleanupQuery(string $uri): string
183-
{
184-
if (false !== $pos = strpos($uri, '?')) {
185-
return substr($uri, 0, $pos);
186-
}
187-
188-
return $uri;
189-
}
190-
191-
/**
192-
* Remove the anchor from the uri.
193-
*/
194-
private function cleanupAnchor(string $uri): string
195-
{
196-
if (false !== $pos = strpos($uri, '#')) {
197-
return substr($uri, 0, $pos);
198-
}
199-
200-
return $uri;
201-
}
202131
}

src/Symfony/Component/DomCrawler/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CHANGELOG
55
-----
66

77
* Added an internal cache layer on top of the CssSelectorConverter
8+
* Added `UriExpander` to expand an URL according to another URL
89

910
5.0.0
1011
-----
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\DomCrawler\Tests;
13+
14+
use PHPUnit\Framework\TestCase;
15+
use Symfony\Component\DomCrawler\UriExpander;
16+
17+
class UriExpanderTest extends TestCase
18+
{
19+
/**
20+
* @dataProvider provideExpandUriTests
21+
*/
22+
public function testExpandUri(string $uri, string $currentUri, string $expected)
23+
{
24+
$this->assertEquals($expected, UriExpander::expandUri($uri, $currentUri));
25+
}
26+
27+
public function provideExpandUriTests()
28+
{
29+
return [
30+
['/foo', 'http://localhost/bar/foo/', 'http://localhost/foo'],
31+
['/foo', 'http://localhost/bar/foo', 'http://localhost/foo'],
32+
['
33+
/foo', 'http://localhost/bar/foo/', 'http://localhost/foo'],
34+
['/foo
35+
', 'http://localhost/bar/foo', 'http://localhost/foo'],
36+
37+
['foo', 'http://localhost/bar/foo/', 'http://localhost/bar/foo/foo'],
38+
['foo', 'http://localhost/bar/foo', 'http://localhost/bar/foo'],
39+
40+
['', 'http://localhost/bar/', 'http://localhost/bar/'],
41+
['#', 'http://localhost/bar/', 'http://localhost/bar/#'],
42+
['#bar', 'http://localhost/bar?a=b', 'http://localhost/bar?a=b#bar'],
43+
['#bar', 'http://localhost/bar/#foo', 'http://localhost/bar/#bar'],
44+
['?a=b', 'http://localhost/bar#foo', 'http://localhost/bar?a=b'],
45+
['?a=b', 'http://localhost/bar/', 'http://localhost/bar/?a=b'],
46+
47+
['http://login.foo.com/foo', 'http://localhost/bar/', 'http://login.foo.com/foo'],
48+
['https://login.foo.com/foo', 'https://localhost/bar/', 'https://login.foo.com/foo'],
49+
['mailto:foo@bar.com', 'http://localhost/foo', 'mailto:foo@bar.com'],
50+
51+
// tests schema relative URL (issue #7169)
52+
['//login.foo.com/foo', 'http://localhost/bar/', 'http://login.foo.com/foo'],
53+
['//login.foo.com/foo', 'https://localhost/bar/', 'https://login.foo.com/foo'],
54+
55+
['?foo=2', 'http://localhost?foo=1', 'http://localhost?foo=2'],
56+
['?foo=2', 'http://localhost/?foo=1', 'http://localhost/?foo=2'],
57+
['?foo=2', 'http://localhost/bar?foo=1', 'http://localhost/bar?foo=2'],
58+
['?foo=2', 'http://localhost/bar/?foo=1', 'http://localhost/bar/?foo=2'],
59+
['?bar=2', 'http://localhost?foo=1', 'http://localhost?bar=2'],
60+
61+
['foo', 'http://login.foo.com/bar/baz?/query/string', 'http://login.foo.com/bar/foo'],
62+
63+
['.', 'http://localhost/foo/bar/baz', 'http://localhost/foo/bar/'],
64+
['./', 'http://localhost/foo/bar/baz', 'http://localhost/foo/bar/'],
65+
['./foo', 'http://localhost/foo/bar/baz', 'http://localhost/foo/bar/foo'],
66+
['..', 'http://localhost/foo/bar/baz', 'http://localhost/foo/'],
67+
['../', 'http://localhost/foo/bar/baz', 'http://localhost/foo/'],
68+
['../foo', 'http://localhost/foo/bar/baz', 'http://localhost/foo/foo'],
69+
['../..', 'http://localhost/foo/bar/baz', 'http://localhost/'],
70+
['../../', 'http://localhost/foo/bar/baz', 'http://localhost/'],
71+
['../../foo', 'http://localhost/foo/bar/baz', 'http://localhost/foo'],
72+
['../../foo', 'http://localhost/bar/foo/', 'http://localhost/foo'],
73+
['../bar/../../foo', 'http://localhost/bar/foo/', 'http://localhost/foo'],
74+
['../bar/./../../foo', 'http://localhost/bar/foo/', 'http://localhost/foo'],
75+
['../../', 'http://localhost/', 'http://localhost/'],
76+
['../../', 'http://localhost', 'http://localhost/'],
77+
78+
['/foo', 'http://localhost?bar=1', 'http://localhost/foo'],
79+
['/foo', 'http://localhost#bar', 'http://localhost/foo'],
80+
['/foo', 'file:///', 'file:///foo'],
81+
['/foo', 'file:///bar/baz', 'file:///foo'],
82+
['foo', 'file:///', 'file:///foo'],
83+
['foo', 'file:///bar/baz', 'file:///bar/foo'],
84+
];
85+
}
86+
}
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\DomCrawler;
13+
14+
/**
15+
* Expand an URI according a current URI.
16+
*
17+
* @author Fabien Potencier <fabien@symfony.com>
18+
* @author Grégoire Pineau <lyrixx@lyrixx.info>
19+
*/
20+
class UriExpander
21+
{
22+
/**
23+
* Expand an URI according to a current Uri.
24+
*
25+
* For example if $uri=/foo/bar and $currentUri=https://symfony.com it will
26+
* return https://symfony.com/foo/bar
27+
*
28+
* If the $uri is not absolute you must pass an absolute $currentUri
29+
*/
30+
public static function expandUri(string $uri, ?string $currentUri): string
31+
{
32+
$uri = trim($uri);
33+
34+
// absolute URL?
35+
if (null !== parse_url($uri, PHP_URL_SCHEME)) {
36+
return $uri;
37+
}
38+
39+
if (null === $currentUri) {
40+
throw new \InvalidArgumentException('The URI is relative, so you must define its base URI passing an absolute URL.');
41+
}
42+
43+
// empty URI
44+
if (!$uri) {
45+
return $currentUri;
46+
}
47+
48+
// an anchor
49+
if ('#' === $uri[0]) {
50+
return self::cleanupAnchor($currentUri).$uri;
51+
}
52+
53+
$baseUri = self::cleanupUri($currentUri);
54+
55+
if ('?' === $uri[0]) {
56+
return $baseUri.$uri;
57+
}
58+
59+
// absolute URL with relative schema
60+
if (0 === strpos($uri, '//')) {
61+
return preg_replace('#^([^/]*)//.*$#', '$1', $baseUri).$uri;
62+
}
63+
64+
$baseUri = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUri);
65+
66+
// absolute path
67+
if ('/' === $uri[0]) {
68+
return $baseUri.$uri;
69+
}
70+
71+
// relative path
72+
$path = parse_url(substr($currentUri, \strlen($baseUri)), PHP_URL_PATH);
73+
$path = self::canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri);
74+
75+
return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path;
76+
}
77+
78+
/**
79+
* Returns the canonicalized URI path (see RFC 3986, section 5.2.4).
80+
*/
81+
private static function canonicalizePath(string $path): string
82+
{
83+
if ('' === $path || '/' === $path) {
84+
return $path;
85+
}
86+
87+
if ('.' === substr($path, -1)) {
88+
$path .= '/';
89+
}
90+
91+
$output = [];
92+
93+
foreach (explode('/', $path) as $segment) {
94+
if ('..' === $segment) {
95+
array_pop($output);
96+
} elseif ('.' !== $segment) {
97+
$output[] = $segment;
98+
}
99+
}
100+
101+
return implode('/', $output);
102+
}
103+
104+
/**
105+
* Removes the query string and the anchor from the given uri.
106+
*/
107+
private static function cleanupUri(string $uri): string
108+
{
109+
return self::cleanupQuery(self::cleanupAnchor($uri));
110+
}
111+
112+
/**
113+
* Remove the query string from the uri.
114+
*/
115+
private static function cleanupQuery(string $uri): string
116+
{
117+
if (false !== $pos = strpos($uri, '?')) {
118+
return substr($uri, 0, $pos);
119+
}
120+
121+
return $uri;
122+
}
123+
124+
/**
125+
* Remove the anchor from the uri.
126+
*/
127+
private static function cleanupAnchor(string $uri): string
128+
{
129+
if (false !== $pos = strpos($uri, '#')) {
130+
return substr($uri, 0, $pos);
131+
}
132+
133+
return $uri;
134+
}
135+
}

0 commit comments

Comments
 (0)
0