diff --git a/UPGRADE-3.0.md b/UPGRADE-3.0.md index 9252af3dac66a..af823eec53cc8 100644 --- a/UPGRADE-3.0.md +++ b/UPGRADE-3.0.md @@ -99,6 +99,18 @@ UPGRADE FROM 2.x to 3.0 removed: `ContainerBuilder::synchronize()`, `Definition::isSynchronized()`, and `Definition::setSynchronized()`. +### DomCrawler + + * The `Form` class don't extend `Link` anymore. An `UriElementInterface` + interface have been introduced since 2.7 and both `Form` and `Link` + implement it. The abstract URI parsing logic is in an + `AbstractUriElement` class. + * The `node` and `method` properties from `AbstractUriElement` are now + private. You can access them with `getNode()` and `getMethod()`. + * The method `AbstractUriElement::setNode()` have been removed, child + classes now have to provide a `findNode` method that returns the node + instead of setting the (now private) `node` property. + ### EventDispatcher * The interface `Symfony\Component\EventDispatcher\Debug\TraceableEventDispatcherInterface` diff --git a/src/Symfony/Component/DomCrawler/AbstractUriElement.php b/src/Symfony/Component/DomCrawler/AbstractUriElement.php new file mode 100644 index 0000000000000..ceff0b9cd2333 --- /dev/null +++ b/src/Symfony/Component/DomCrawler/AbstractUriElement.php @@ -0,0 +1,202 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\DomCrawler; + +/** + * Any HTML element that can link to an URI. + * + * @author Fabien Potencier + * + * @api + */ +abstract class AbstractUriElement implements UriElementInterface +{ + /** + * @var string The URI of the page where the element is embedded (or the base href) + */ + protected $currentUri; + + /** + * @var \DOMElement + */ + private $node; + + /** + * @var string The method to use for the element URI + */ + private $method; + + /** + * Constructor. + * + * @param \DOMElement $node A \DOMElement instance + * @param string $currentUri The URI of the page where the element is embedded (or the base href) + * @param string $method The method to use for the element URI (get by default) + * + * @throws \InvalidArgumentException if the node is not a link + * + * @api + */ + public function __construct(\DOMElement $node, $currentUri, $method = 'GET') + { + if (!in_array(strtolower(substr($currentUri, 0, 4)), array('http', 'file'))) { + throw new \InvalidArgumentException(sprintf('Current URI must be an absolute URL ("%s").', $currentUri)); + } + + $this->node = $this->findNode($node); + $this->method = $method ? strtoupper($method) : null; + $this->currentUri = $currentUri; + } + + public function getNode() + { + return $this->node; + } + + public function getMethod() + { + return $this->method; + } + + public function getUri() + { + $uri = trim($this->getRawUri()); + + // absolute URL? + if (null !== parse_url($uri, PHP_URL_SCHEME)) { + return $uri; + } + + // empty URI + if (!$uri) { + return $this->currentUri; + } + + // an anchor + if ('#' === $uri[0]) { + return $this->cleanupAnchor($this->currentUri).$uri; + } + + $baseUri = $this->cleanupUri($this->currentUri); + + if ('?' === $uri[0]) { + return $baseUri.$uri; + } + + // absolute URL with relative schema + if (0 === strpos($uri, '//')) { + return preg_replace('#^([^/]*)//.*$#', '$1', $baseUri).$uri; + } + + $baseUri = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUri); + + // absolute path + if ('/' === $uri[0]) { + return $baseUri.$uri; + } + + // relative path + $path = parse_url(substr($this->currentUri, strlen($baseUri)), PHP_URL_PATH); + $path = $this->canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri); + + return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path; + } + + /** + * Returns raw URI data. + * + * @return string + */ + abstract protected function getRawUri(); + + /** + * Returns the canonicalized URI path (see RFC 3986, section 5.2.4). + * + * @param string $path URI path + * + * @return string + */ + protected function canonicalizePath($path) + { + if ('' === $path || '/' === $path) { + return $path; + } + + if ('.' === substr($path, -1)) { + $path = $path.'/'; + } + + $output = array(); + + foreach (explode('/', $path) as $segment) { + if ('..' === $segment) { + array_pop($output); + } elseif ('.' !== $segment) { + array_push($output, $segment); + } + } + + return implode('/', $output); + } + + /** + * Sets current \DOMElement instance. + * + * @param \DOMElement $node A \DOMElement instance + * @return \DOMElement + */ + abstract protected function findNode(\DOMElement $node); + + /** + * Removes the query string and the anchor from the given uri. + * + * @param string $uri The uri to clean + * + * @return string + */ + private function cleanupUri($uri) + { + return $this->cleanupQuery($this->cleanupAnchor($uri)); + } + + /** + * Remove the query string from the uri. + * + * @param string $uri + * + * @return string + */ + private function cleanupQuery($uri) + { + if (false !== $pos = strpos($uri, '?')) { + return substr($uri, 0, $pos); + } + + return $uri; + } + + /** + * Remove the anchor from the uri. + * + * @param string $uri + * + * @return string + */ + private function cleanupAnchor($uri) + { + if (false !== $pos = strpos($uri, '#')) { + return substr($uri, 0, $pos); + } + + return $uri; + } +} diff --git a/src/Symfony/Component/DomCrawler/CHANGELOG.md b/src/Symfony/Component/DomCrawler/CHANGELOG.md index 48fd323f8202c..47e674482f05a 100644 --- a/src/Symfony/Component/DomCrawler/CHANGELOG.md +++ b/src/Symfony/Component/DomCrawler/CHANGELOG.md @@ -1,6 +1,19 @@ CHANGELOG ========= +3.0.0 +----- + +* [BC BREAK] `Form` now extends `AbstractUriElement` instead of `Link`. +* [BC BREAK] The `node` and `method` properties of `AbstractUriElement` are now private (they already have public getters). +* [BC BREAK] Since `node` is private, `setNode` in child classes can't set the `node` property directly, thus `setNode` have been renamed to `findNode` and now returns the node instead of setting the property. + +2.7.0 +----- + +* All the URI parsing logic have been abstracted in the `AbstractUriElement` class. The `Link` class is now a child of `AbstractUriElement` which implements the new `UriElementInterface`, describing the common `getNode`, `getMethod` and `getUri` methods. +* Added an `Image` class to crawl images and parse their `src` attribute, and `selectImage`, `image`, `images` methods in `Crawler`, the image version of the equivalent `link` methods. + 2.5.0 ----- diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 4f52f218b6710..d65bc92ed0823 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -690,6 +690,22 @@ public function selectLink($value) return $this->filterRelativeXPath($xpath); } + /** + * Selects images by alt value. + * + * @param string $value The image alt + * + * @return Crawler A new instance of Crawler with the filtered list of nodes + * + * @api + */ + public function selectImage($value) + { + $xpath = sprintf('descendant-or-self::img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]', static::xpathLiteral(' '.$value.' ')); + + return $this->filterRelativeXPath($xpath); + } + /** * Selects a button by name or alt value for images. * @@ -748,6 +764,45 @@ public function links() return $links; } + /** + * Returns an Image object for the first node in the list. + * + * @param string $method The method for the image (get by default) + * + * @return Image An Image instance + * + * @throws \InvalidArgumentException If the current node list is empty + * + * @api + */ + public function image($method = 'get') + { + if (!count($this)) { + throw new \InvalidArgumentException('The current node list is empty.'); + } + + $node = $this->getNode(0); + + return new Image($node, $this->baseHref, $method); + } + + /** + * Returns an array of Image objects for the nodes in the list. + * + * @return Image[] An array of Image instances + * + * @api + */ + public function images() + { + $images = array(); + foreach ($this as $node) { + $images[] = new Image($node, $this->baseHref, 'get'); + } + + return $images; + } + /** * Returns a Form object for the first node in the list. * diff --git a/src/Symfony/Component/DomCrawler/Form.php b/src/Symfony/Component/DomCrawler/Form.php index 528ad36a3df93..52513b5251d53 100644 --- a/src/Symfony/Component/DomCrawler/Form.php +++ b/src/Symfony/Component/DomCrawler/Form.php @@ -21,7 +21,7 @@ * * @api */ -class Form extends Link implements \ArrayAccess +class Form extends AbstractUriElement implements \ArrayAccess { /** * @var \DOMElement @@ -58,7 +58,7 @@ public function __construct(\DOMElement $node, $currentUri, $method = null) */ public function getFormNode() { - return $this->node; + return $this->getNode(); } /** @@ -207,7 +207,7 @@ public function getUri() protected function getRawUri() { - return $this->node->getAttribute('action'); + return $this->getNode()->getAttribute('action'); } /** @@ -221,11 +221,11 @@ protected function getRawUri() */ public function getMethod() { - if (null !== $this->method) { - return $this->method; + if (null !== parent::getMethod()) { + return parent::getMethod(); } - return $this->node->getAttribute('method') ? strtoupper($this->node->getAttribute('method')) : 'GET'; + return $this->getNode()->getAttribute('method') ? strtoupper($this->getNode()->getAttribute('method')) : 'GET'; } /** @@ -368,9 +368,11 @@ public function disableValidation() * * @param \DOMElement $node A \DOMElement instance * + * @return \DOMElement + * * @throws \LogicException If given node is not a button or input or does not have a form ancestor */ - protected function setNode(\DOMElement $node) + protected function findNode(\DOMElement $node) { $this->button = $node; if ('button' === $node->nodeName || ('input' === $node->nodeName && in_array(strtolower($node->getAttribute('type')), array('submit', 'button', 'image')))) { @@ -381,9 +383,8 @@ protected function setNode(\DOMElement $node) if (null === $form) { throw new \LogicException(sprintf('The selected node has an invalid form attribute (%s).', $formId)); } - $this->node = $form; - return; + return $form; } // we loop until we find a form ancestor do { @@ -395,7 +396,7 @@ protected function setNode(\DOMElement $node) throw new \LogicException(sprintf('Unable to submit on a "%s" tag.', $node->nodeName)); } - $this->node = $node; + return $node; } /** @@ -409,7 +410,7 @@ private function initialize() { $this->fields = new FormFieldRegistry(); - $xpath = new \DOMXPath($this->node->ownerDocument); + $xpath = new \DOMXPath($this->getNode()->ownerDocument); // add submitted button if it has a valid name if ('form' !== $this->button->nodeName && $this->button->hasAttribute('name') && $this->button->getAttribute('name')) { @@ -433,18 +434,18 @@ private function initialize() } // find form elements corresponding to the current form - if ($this->node->hasAttribute('id')) { + if ($this->getNode()->hasAttribute('id')) { // corresponding elements are either descendants or have a matching HTML5 form attribute - $formId = Crawler::xpathLiteral($this->node->getAttribute('id')); + $formId = Crawler::xpathLiteral($this->getNode()->getAttribute('id')); $fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId)); foreach ($fieldNodes as $node) { $this->addField($node); } } else { - // do the xpath query with $this->node as the context node, to only find descendant elements + // do the xpath query with $this->getNode() as the context node, to only find descendant elements // however, descendant elements with form attribute are not part of this form - $fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $this->node); + $fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $this->getNode()); foreach ($fieldNodes as $node) { $this->addField($node); } diff --git a/src/Symfony/Component/DomCrawler/Image.php b/src/Symfony/Component/DomCrawler/Image.php new file mode 100644 index 0000000000000..e79d3c196ac54 --- /dev/null +++ b/src/Symfony/Component/DomCrawler/Image.php @@ -0,0 +1,34 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\DomCrawler; + +/** + * Image represents an HTML image (an HTML img tag). + * + * @api + */ +class Image extends AbstractUriElement +{ + protected function getRawUri() + { + return $this->getNode()->getAttribute('src'); + } + + protected function findNode(\DOMElement $node) + { + if ('img' !== $node->nodeName) { + throw new \LogicException(sprintf('Unable to visualize a "%s" tag.', $node->nodeName)); + } + + return $node; + } +} diff --git a/src/Symfony/Component/DomCrawler/Link.php b/src/Symfony/Component/DomCrawler/Link.php index 03149f2cee82f..1bb802726f57c 100644 --- a/src/Symfony/Component/DomCrawler/Link.php +++ b/src/Symfony/Component/DomCrawler/Link.php @@ -18,215 +18,19 @@ * * @api */ -class Link +class Link extends AbstractUriElement { - /** - * @var \DOMElement - */ - protected $node; - - /** - * @var string The method to use for the link - */ - protected $method; - - /** - * @var string The URI of the page where the link is embedded (or the base href) - */ - protected $currentUri; - - /** - * Constructor. - * - * @param \DOMElement $node A \DOMElement instance - * @param string $currentUri The URI of the page where the link is embedded (or the base href) - * @param string $method The method to use for the link (get by default) - * - * @throws \InvalidArgumentException if the node is not a link - * - * @api - */ - public function __construct(\DOMElement $node, $currentUri, $method = 'GET') - { - if (!in_array(strtolower(substr($currentUri, 0, 4)), array('http', 'file'))) { - throw new \InvalidArgumentException(sprintf('Current URI must be an absolute URL ("%s").', $currentUri)); - } - - $this->setNode($node); - $this->method = $method ? strtoupper($method) : null; - $this->currentUri = $currentUri; - } - - /** - * Gets the node associated with this link. - * - * @return \DOMElement A \DOMElement instance - */ - public function getNode() - { - return $this->node; - } - - /** - * Gets the method associated with this link. - * - * @return string The method - * - * @api - */ - public function getMethod() - { - return $this->method; - } - - /** - * Gets the URI associated with this link. - * - * @return string The URI - * - * @api - */ - public function getUri() - { - $uri = trim($this->getRawUri()); - - // absolute URL? - if (null !== parse_url($uri, PHP_URL_SCHEME)) { - return $uri; - } - - // empty URI - if (!$uri) { - return $this->currentUri; - } - - // an anchor - if ('#' === $uri[0]) { - return $this->cleanupAnchor($this->currentUri).$uri; - } - - $baseUri = $this->cleanupUri($this->currentUri); - - if ('?' === $uri[0]) { - return $baseUri.$uri; - } - - // absolute URL with relative schema - if (0 === strpos($uri, '//')) { - return preg_replace('#^([^/]*)//.*$#', '$1', $baseUri).$uri; - } - - $baseUri = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUri); - - // absolute path - if ('/' === $uri[0]) { - return $baseUri.$uri; - } - - // relative path - $path = parse_url(substr($this->currentUri, strlen($baseUri)), PHP_URL_PATH); - $path = $this->canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri); - - return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path; - } - - /** - * Returns raw URI data. - * - * @return string - */ protected function getRawUri() { - return $this->node->getAttribute('href'); - } - - /** - * Returns the canonicalized URI path (see RFC 3986, section 5.2.4). - * - * @param string $path URI path - * - * @return string - */ - protected function canonicalizePath($path) - { - if ('' === $path || '/' === $path) { - return $path; - } - - if ('.' === substr($path, -1)) { - $path = $path.'/'; - } - - $output = array(); - - foreach (explode('/', $path) as $segment) { - if ('..' === $segment) { - array_pop($output); - } elseif ('.' !== $segment) { - array_push($output, $segment); - } - } - - return implode('/', $output); + return $this->getNode()->getAttribute('href'); } - /** - * Sets current \DOMElement instance. - * - * @param \DOMElement $node A \DOMElement instance - * - * @throws \LogicException If given node is not an anchor - */ - protected function setNode(\DOMElement $node) + protected function findNode(\DOMElement $node) { if ('a' !== $node->nodeName && 'area' !== $node->nodeName && 'link' !== $node->nodeName) { throw new \LogicException(sprintf('Unable to navigate from a "%s" tag.', $node->nodeName)); } - $this->node = $node; - } - - /** - * Removes the query string and the anchor from the given uri. - * - * @param string $uri The uri to clean - * - * @return string - */ - private function cleanupUri($uri) - { - return $this->cleanupQuery($this->cleanupAnchor($uri)); - } - - /** - * Remove the query string from the uri. - * - * @param string $uri - * - * @return string - */ - private function cleanupQuery($uri) - { - if (false !== $pos = strpos($uri, '?')) { - return substr($uri, 0, $pos); - } - - return $uri; - } - - /** - * Remove the anchor from the uri. - * - * @param string $uri - * - * @return string - */ - private function cleanupAnchor($uri) - { - if (false !== $pos = strpos($uri, '#')) { - return substr($uri, 0, $pos); - } - - return $uri; + return $node; } } diff --git a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php index 82045a1dd5ae3..3a1cc928fa9e6 100755 --- a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php @@ -672,6 +672,17 @@ public function testSelectLink() $this->assertCount(4, $crawler->selectLink('Bar'), '->selectLink() selects links by the node values'); } + public function testSelectImage() + { + $crawler = $this->createTestCrawler(); + $this->assertNotSame($crawler, $crawler->selectImage('Bar'), '->selectImage() returns a new instance of a crawler'); + $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->selectImage() returns a new instance of a crawler'); + + $this->assertCount(1, $crawler->selectImage('Fabien\'s Bar'), '->selectImage() selects images by alt attribute'); + $this->assertCount(2, $crawler->selectLink('Fabien"s Bar'), '->selectImage() selects images by alt attribute'); + $this->assertCount(1, $crawler->selectLink('\' Fabien"s Bar'), '->selectImage() selects images by alt attribute'); + } + public function testSelectButton() { $crawler = $this->createTestCrawler(); @@ -750,6 +761,21 @@ public function testLink() } } + public function testImage() + { + $crawler = $this->createTestCrawler('http://example.com/bar/')->selectImage('Bar'); + $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Image', $crawler->image(), '->image() returns an Image instance'); + + $this->assertEquals('POST', $crawler->image('post')->getMethod(), '->image() takes a method as its argument'); + + try { + $this->createTestCrawler()->filterXPath('//ol')->image(); + $this->fail('->image() throws an \InvalidArgumentException if the node list is empty'); + } catch (\InvalidArgumentException $e) { + $this->assertTrue(true, '->image() throws an \InvalidArgumentException if the node list is empty'); + } + } + public function testSelectLinkAndLinkFiltered() { $html = <<assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->links(), '->links() returns an empty array if the node selection is empty'); } + public function testImages() + { + $crawler = $this->createTestCrawler('http://example.com/bar/')->selectImage('Bar'); + $this->assertInternalType('array', $crawler->images(), '->images() returns an array'); + + $this->assertCount(4, $crawler->images(), '->images() returns an array'); + $images = $crawler->images(); + $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Image', $images[0], '->images() returns an array of Image instances'); + + $this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->links(), '->links() returns an empty array if the node selection is empty'); + } + public function testForm() { $testCrawler = $this->createTestCrawler('http://example.com/bar/'); diff --git a/src/Symfony/Component/DomCrawler/Tests/ImageTest.php b/src/Symfony/Component/DomCrawler/Tests/ImageTest.php new file mode 100644 index 0000000000000..71a74c31f1904 --- /dev/null +++ b/src/Symfony/Component/DomCrawler/Tests/ImageTest.php @@ -0,0 +1,48 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\DomCrawler\Tests; + +use Symfony\Component\DomCrawler\Image; + +class ImageTest extends \PHPUnit_Framework_TestCase +{ + /** + * @expectedException \LogicException + */ + public function testConstructorWithANonImgTag() + { + $dom = new \DOMDocument(); + $dom->loadHTML('
'); + + new Image($dom->getElementsByTagName('div')->item(0), 'http://www.example.com/'); + } + + /** + * @dataProvider getGetUriTests + */ + public function testGetUri($url, $currentUri, $expected) + { + $dom = new \DOMDocument(); + $dom->loadHTML(sprintf('foo', $url)); + $image = new Image($dom->getElementsByTagName('img')->item(0), $currentUri); + + $this->assertEquals($expected, $image->getUri()); + } + + public function getGetUriTests() + { + return array( + array('/foo.png', 'http://localhost/bar/foo/', 'http://localhost/foo.png'), + array('foo.png', 'http://localhost/bar/foo/', 'http://localhost/bar/foo/foo.png'), + ); + } +} diff --git a/src/Symfony/Component/DomCrawler/UriElementInterface.php b/src/Symfony/Component/DomCrawler/UriElementInterface.php new file mode 100644 index 0000000000000..67889d18ac083 --- /dev/null +++ b/src/Symfony/Component/DomCrawler/UriElementInterface.php @@ -0,0 +1,45 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\DomCrawler; + +/** + * Any HTML element that can link to an URI. + * + * @api + */ +interface UriElementInterface +{ + /** + * Gets the node associated with this element. + * + * @return \DOMElement A \DOMElement instance + */ + public function getNode(); + + /** + * Gets the method associated with this element. + * + * @return string The method + * + * @api + */ + public function getMethod(); + + /** + * Gets the URI associated with this element. + * + * @return string The URI + * + * @api + */ + public function getUri(); +}