8000 [DomCrawler] Abstract URI logic and crawl images by valeriangalliat · Pull Request #13620 · symfony/symfony · GitHub
[go: up one dir, main page]

Skip to content

[DomCrawler] Abstract URI logic and crawl images #13620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[DomCrawler] Added ability to crawl images
A new Image class is added, extending AbstractUriElement, to leverage
URI methods for the HTML img src attribute.

Two methods are added to the Crawler class, image and images, that are
the equivalent of link and links for images.
  • Loading branch information
valeriangalliat committed Feb 7, 2015
commit 0efb7f587003371f79e0dd41bb4f0c5619770359
1 change: 1 addition & 0 deletions src/Symfony/Component/DomCrawler/CHANGELOG.md
8000
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CHANGELOG
-----

* [BC BREAK] All the URI parsing logic have been abstracted in the `AbstractUriElement` class. The `Form` class don't extends `Link` anymore, it's now a child of `AbstractUriElement`. The `UriElementInterface` have been introduced for the common `getNode`, `getMethod` and `getUri` methods.
* Added an `Image` class to crawl images and parse their `src` attribute, and `selectImage`, `image`, `images` methods in `Crawler`, the image version of the equivalent `link` methods.

2.5.0
-----
Expand Down
55 changes: 55 additions & 0 deletions src/Symfony/Component/DomCrawler/Crawler.php
8000
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,22 @@ public function selectLink($value)
return $this->filterRelativeXPath($xpath);
}

/**
* Selects images by alt value.
*
* @param string $value The image alt
*
* @return Crawler A new instance of Crawler with the filtered list of nodes
*
* @api
*/
public function selectImage($value)
{
$xpath = sprintf('descendant-or-self::img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]', static::xpathLiteral(' '.$value.' '));

return $this->filterRelativeXPath($xpath);
}

/**
* Selects a button by name or alt value for images.
*
Expand Down Expand Up @@ -748,6 +764,45 @@ public function links()
return $links;
}

/**
* Returns an Image object for the first node in the list.
*
* @param string $method The method for the image (get by default)
*
* @return Image An Image instance
*
* @throws \InvalidArgumentException If the current node list is empty
*
* @api
*/
public function image($method = 'get')
{
if (!count($this)) {
throw new \InvalidArgumentException('The current node list is empty.');
}

$node = $this->getNode(0);

return new Image($node, $this->baseHref, $method);
}

/**
* Returns an array of Image objects for the nodes in the list.
*
* @return Image[] An array of Image instances
*
* @api
*/
public function images()
{
$images = array();
foreach ($this as $node) {
$images[] = new Image($node, $this->baseHref, 'get');
}

return $images;
}

/**
* Returns a Form object for the first node in the list.
*
Expand Down
34 changes: 34 additions & 0 deletions src/Symfony/Component/DomCrawler/Image.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\DomCrawler;

/**
* Image represents an HTML image (an HTML img tag).
*
* @api
*/
class Image extends AbstractUriElement
{
protected function getRawUri()
{
return $this->node->getAttribute('src');
}

protected function setNode(\DOMElement $node)
{
if ('img' !== $node->nodeName) {
throw new \LogicException(sprintf('Unable to visualize a "%s" tag.', $node->nodeName));
}

$this->node = $node;
}
}
38 changes: 38 additions & 0 deletions src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,17 @@ public function testSelectLink()
$this->assertCount(4, $crawler->selectLink('Bar'), '->selectLink() selects links by the node values');
}

public function testSelectImage()
{
$crawler = $this->createTestCrawler();
$this->assertNotSame($crawler, $crawler->selectImage('Bar'), '->selectImage() returns a new instance of a crawler');
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->selectImage() returns a new instance of a crawler');

$this->assertCount(1, $crawler->selectImage('Fabien\'s Bar'), '->selectImage() selects images by alt attribute');
$this->assertCount(2, $crawler->selectLink('Fabien"s Bar'), '->selectImage() selects images by alt attribute');
$this->assertCount(1, $crawler->selectLink('\' Fabien"s Bar'), '->selectImage() selects images by alt attribute');
}

public function testSelectButton()
{
$crawler = $this->createTestCrawler();
Expand Down Expand Up @@ -750,6 +761,21 @@ public function testLink()
}
}

public function testImage()
{
$crawler = $this->createTestCrawler('http://example.com/bar/')->selectImage('Bar');
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Image', $crawler->image(), '->image() returns an Image instance');

$this->assertEquals('POST', $crawler->image('post')->getMethod(), '->image() takes a method as its argument');

try {
$this->createTestCrawler()->filterXPath('//ol')->image();
$this->fail('->image() throws an \InvalidArgumentException if the node list is empty');
} catch (\InvalidArgumentException $e) {
$this->assertTrue(true, '->image() throws an \InvalidArgumentException if the node list is empty');
}
}

public function testSelectLinkAndLinkFiltered()
{
$html = <<<HTML
Expand Down Expand Up @@ -800,6 +826,18 @@ public function testLinks()
$this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->links(), '->links() returns an empty array if the node selection is empty');
}

public function testImages()
{
$crawler = $this->createTestCrawler('http://example.com/bar/')->selectImage('Bar');
$this->assertInternalType('array', $crawler->images(), '->images() returns an array');

$this->assertCount(4, $crawler->images(), '->images() returns an array');
$images = $crawler->images();
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Image', $images[0], '->images() returns an array of Image instances');

$this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->links(), '->links() returns an empty array if the node selection is empty');
}

public function testForm()
{
$testCrawler = $this->createTestCrawler('http://example.com/bar/');
Expand Down
48 changes: 48 additions & 0 deletions src/Symfony/Component/DomCrawler/Tests/ImageTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\DomCrawler\Tests;

use Symfony\Component\DomCrawler\Image;

class ImageTest extends \PHPUnit_Framework_TestCase
{
/**
* @expectedException \LogicException
*/
public function testConstructorWithANonImgTag()
{
$dom = new \DOMDocument();
$dom->loadHTML('<html><div><div></html>');

new Image($dom->getElementsByTagName('div')->item(0), 'http://www.example.com/');
}

/**
* @dataProvider getGetUriTests
*/
public function testGetUri($url, $currentUri, $expected)
{
$dom = new \DOMDocument();
$dom->loadHTML(sprintf('<html><img alt="foo" src="%s" /></html>', $url));
$image = new Image($dom->getElementsByTagName('img')->item(0), $currentUri);

$this->assertEquals($expected, $image->getUri());
}

public function getGetUriTests()
{
return array(
array('/foo.png', 'http://localhost/bar/foo/', 'http://localhost/foo.png'),
array('foo.png', 'http://localhost/bar/foo/', 'http://localhost/bar/foo/foo.png'),
);
}
}
0