8000 [DomCrawler] add a value() method, normalize whitespaces by xabbuh · Pull Request #24412 · symfony/symfony · GitHub
[go: up one dir, main page]

Skip to content

[DomCrawler] add a value() method, normalize whitespaces #24412

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
add a value() method, normalize whitespaces
  • Loading branch information
xabbuh committed Oct 4, 2017
commit cdea8b20a79cb8b906b73f8d8a38126ca0e3f667
7 changes: 7 additions & 0 deletions src/Symfony/Component/DomCrawler/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
CHANGELOG
=========

3.4.0
-----

* Added an argument to the `Crawler::text()` method to opt-in normalizing whitespaces.
* Added the `Crawler::value()` method which behaves the same as `Crawler::text()` with the whitespace normalization
parameter being passed.

3.1.0
-----

Expand Down
23 changes: 22 additions & 1 deletion src/Symfony/Component/DomCrawler/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -556,13 +556,34 @@ public function nodeName()
}

/**
* Returns the node value of the first node of the list.
* Returns the text of the first node of the list.
*
* Pass true as an argument to normalize whitespaces.
*
* @return string The node value
*
* @throws \InvalidArgumentException When current node is empty
*/
public function text()
{
$value = $this->value();

// argument to be deprecated in 4.1, whitespace normalization to become the default behavior
if (func_num_args() && func_get_arg(0)) {
$value = trim(preg_replace('/\s++/', ' ', $value));
}

return $value;
}

/**
* Returns the node value of the first node of the list.
*
* @return string The node value
*
* @throws \InvalidArgumentException When current node is empty
*/
public function value()
{
if (!$this->nodes) {
throw new \InvalidArgumentException('The current node list is empty.');
Expand Down
89 changes: 55 additions & 34 deletions src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public function testAdd()

$crawler = new Crawler();
$crawler->add('<html><body>Foo</body></html>');
$this->assertEquals('Foo', $crawler->filterXPath('//body')->text(), '->add() adds nodes from a string');
$this->assertEquals('Foo', $crawler->filterXPath('//body')->value(), '->add() adds nodes from a string');
}

/**
Expand Down Expand Up @@ -114,7 +114,7 @@ public function testAddHtmlContentCharset()
$crawler = new Crawler();
$crawler->addHtmlContent('<html><div class="foo">Tiếng Việt</html>', 'UTF-8');

$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->value());
}

public function testAddHtmlContentInvalidBaseTag()
Expand All @@ -131,7 +131,7 @@ public function testAddHtmlContentUnsupportedCharset()
$crawler = new Crawler();
$crawler->addHtmlContent(file_get_contents(__DIR__.'/Fixtures/windows-1250.html'), 'Windows-1250');

$this->assertEquals('Žťčýů', $crawler->filterXPath('//p')->text());
$this->assertEquals('Žťčýů', $crawler->filterXPath('//p')->value());
}

/**
Expand All @@ -143,7 +143,7 @@ public function testAddHtmlContentCharsetGbk()
//gbk encode of <html><p>中文</p></html>
$crawler->addHtmlContent(base64_decode('PGh0bWw+PHA+1tDOxDwvcD48L2h0bWw+'), 'gbk');

$this->assertEquals('中文', $crawler->filterXPath('//p')->text());
$this->assertEquals('中文', $crawler->filterXPath('//p')->value());
}

public function testAddHtmlContentWithErrors()
Expand Down Expand Up @@ -184,7 +184,7 @@ public function testAddXmlContentCharset()
$crawler = new Crawler();
$crawler->addXmlContent('<html><div class="foo">Tiếng Việt</div></html>', 'UTF-8');

$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->value());
}

public function testAddXmlContentWithErrors()
Expand Down Expand Up @@ -238,7 +238,7 @@ public function testAddContent()

$crawler = new Crawler();
$crawler->addContent('<html><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><span>中文</span></html>');
$this->assertEquals('中文', $crawler->filterXPath('//span')->text(), '->addContent() guess wrong charset');
$this->assertEquals('中文', $crawler->filterXPath('//span')->value(), '->addContent() guess wrong charset');
}

/**
Expand All @@ -248,7 +248,7 @@ public function testAddContentNonUtf8()
{
$crawler = new Crawler();
$crawler->addContent(iconv('UTF-8', 'SJIS', '<html><head><meta charset="Shift_JIS"></head><body>日本語</body></html>'));
$this->assertEquals('日本語', $crawler->filterXPath('//body')->text(), '->addContent() can recognize "Shift_JIS" in html5 meta charset tag');
$this->assertEquals('日本語', $crawler->filterXPath('//body')->value(), '->addContent() can recognize "Shift_JIS" in html5 meta charset tag');
}

public function testAddDocument()
Expand Down Expand Up @@ -304,14 +304,14 @@ public function testEq()
$this->assertNotSame($crawler, $crawler->eq(0), '->eq() returns a new instance of a crawler');
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->eq() returns a new instance of a crawler');

$this->assertEquals('Two', $crawler->eq(1)->text(), '->eq() returns the nth node of the list');
$this->assertEquals('Two', $crawler->eq(1)->value(), '->eq() returns the nth node of the list');
$this->assertCount(0, $crawler->eq(100), '->eq() returns an empty crawler if the nth node does not exist');
}

public function testEach()
{
$data = $this->createTestCrawler()->filterXPath('//ul[1]/li')->each(function ($node, $i) {
return $i.'-'.$node->text();
return $i.'-'.$node->value();
});

$this->assertEquals(array('0-One', '1-Two', '2-Three'), $data, '->each() executes an anonymous function on each node of the list');
Expand Down Expand Up @@ -384,14 +384,33 @@ public function testNodeName()

public function testText()
{
$this->assertEquals('One', $this->createTestCrawler()->filterXPath('//li')->text(), '->text() returns the node value of the first element of the node list');
$this->assertEquals("one two\nthree ", $this->createTestCrawler()->filterXPath('//div[@id="text-whitespaces"]')->text(), '->text() returns the node value of the first element of the node list');
}

try {
$this->createTestCrawler()->filterXPath('//ol')->text();
$this->fail('->text() throws an \InvalidArgumentException if the node list is empty');
} catch (\InvalidArgumentException $e) {
$this->assertTrue(true, '->text() throws an \InvalidArgumentException if the node list is empty');
}
public function testTextWithNormalizedWhitespaces()
{
$this->assertEquals('one two three', $this->createTestCrawler()->filterXPath('//div[@id="text-whitespaces"]')->text(true), '->text() returns the node value of the first element of the node list');
}

/**
* @expectedException \InvalidArgumentException
*/
public function testTextThrowsExceptionWhenNodeListIsEmpty()
{
$this->createTestCrawler()->filterXPath('//ol')->text(true);
}

public function testValue()
{
$this->assertEquals("one two\nthree ", $this->createTestCrawler()->filterXPath('//div[@id="text-whitespaces"]')->value(), '->value() returns the node value of the first element of the node list');
}

/**
* @expectedException \InvalidArgumentException
*/
public function testValueThrowsExceptionWhenNodeListIsEmpty()
{
$this->createTestCrawler()->filterXPath('//ol')->value();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be a separate test like testValueThrowsExceptionWhenNodeListEmpty. if the above call aleady fails with InvalidArgumentException, the test would still pass.

}

public function testHtml()
Expand Down Expand Up @@ -463,7 +482,7 @@ public function testFilterXPathWithDefaultNamespace()
{
$crawler = $this->createTestXmlCrawler()->filterXPath('//default:entry/default:id');
$this->assertCount(1, $crawler, '->filterXPath() automatically registers a namespace');
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->value());
}

public function testFilterXPathWithCustomDefaultNamespace()
Expand All @@ -473,7 +492,7 @@ public function testFilterXPathWithCustomDefaultNamespace()
$crawler = $crawler->filterXPath('//x:entry/x:id');

$this->assertCount(1, $crawler, '->filterXPath() lets to override the default namespace prefix');
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->value());
}

public function testFilterXPathWithNamespace()
Expand All @@ -486,7 +505,7 @@ public function testFilterXPathWithMultipleNamespaces()
{
$crawler = $this->createTestXmlCrawler()->filterXPath('//media:group/yt:aspectRatio');
$this->assertCount(1, $crawler, '->filterXPath() automatically registers multiple namespaces');
$this->assertSame('widescreen', $crawler->text());
$this->assertSame('widescreen', $crawler->value());
}

public function testFilterXPathWithManuallyRegisteredNamespace()
Expand All @@ -496,7 +515,7 @@ public function testFilterXPathWithManuallyRegisteredNamespace()

$crawler = $crawler->filterXPath('//m:group/yt:aspectRatio');
$this->assertCount(1, $crawler, '->filterXPath() uses manually registered namesp 8000 ace');
$this->assertSame('widescreen', $crawler->text());
$this->assertSame('widescreen', $crawler->value());
}

public function testFilterXPathWithAnUrl()
Expand All @@ -505,7 +524,7 @@ public function testFilterXPathWithAnUrl()

$crawler = $crawler->filterXPath('//media:category[@scheme="http://gdata.youtube.com/schemas/2007/categories.cat"]');
$this->assertCount(1, $crawler);
$this->assertSame('Music', $crawler->text());
$this->assertSame('Music', $crawler->value());
}

public function testFilterXPathWithFakeRoot()
Expand Down Expand Up @@ -622,7 +641,7 @@ public function testFilterWithDefaultNamespace()
{
$crawler = $this->createTestXmlCrawler()->filter('default|entry default|id');
$this->assertCount(1, $crawler, '->filter() automatically registers namespaces');
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->value());
}

public function testFilterWithNamespace()
Expand All @@ -635,7 +654,7 @@ public function testFilterWithMultipleNamespaces()
{
$crawler = $this->createTestXmlCrawler()->filter('media|group yt|aspectRatio');
$this->assertCount(1, $crawler, '->filter() automatically registers namespaces');
$this->assertSame('widescreen', $crawler->text());
$this->assertSame('widescreen', $crawler->value());
}

public function testFilterWithDefaultNamespaceOnly()
Expand Down Expand Up @@ -901,7 +920,7 @@ public function testLast()
$this->assertNotSame($crawler, $crawler->last(), '->last() returns a new instance of a crawler');
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->last() returns a new instance of a crawler');

$this->assertEquals('Three', $crawler->last()->text());
$this->assertEquals('Three', $crawler->last()->value());
}

public function testFirst()
Expand All @@ -910,7 +929,7 @@ public function testFirst()
$this->assertNotSame($crawler, $crawler->first(), '->first() returns a new instance of a crawler');
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->first() returns a new instance of a crawler');

$this->assertEquals('One', $crawler->first()->text());
$this->assertEquals('One', $crawler->first()->value());
}

public function testSiblings()
Expand All @@ -921,13 +940,13 @@ public function testSiblings()

$nodes = $crawler->siblings();
$this->assertEquals(2, $nodes->count());
$this->assertEquals('One', $nodes->eq(0)->text());
$this->assertEquals('Three', $nodes->eq(1)->text());
$this->assertEquals('One', $nodes->eq(0)->value());
$this->assertEquals('Three', $nodes->eq(1)->value());

$nodes = $this->createTestCrawler()->filterXPath('//li')->eq(0)->siblings();
$this->assertEquals(2, $nodes->count());
$this->assertEquals('Two', $nodes->eq(0)->text());
$this->assertEquals('Three', $nodes->eq(1)->text());
$this->assertEquals('Two', $nodes->eq(0)->value());
$this->assertEquals('Three', $nodes->eq(1)->value());

try {
$this->createTestCrawler()->filterXPath('//ol')->siblings();
Expand All @@ -945,7 +964,7 @@ public function testNextAll()

$nodes = $crawler->nextAll();
$this->assertEquals(1, $nodes->count());
$this->assertEquals('Three', $nodes->eq(0)->text());
$this->assertEquals('Three', $nodes->eq(0)->value());

try {
$this->createTestCrawler()->filterXPath('//ol')->nextAll();
Expand All @@ -963,7 +982,7 @@ public function testPreviousAll()

$nodes = $crawler->previousAll();
$this->assertEquals(2, $nodes->count());
$this->assertEquals('Two', $nodes->eq(0)->text());
$this->assertEquals('Two', $nodes->eq(0)->value());

try {
$this->createTestCrawler()->filterXPath('//ol')->previousAll();
Expand All @@ -981,9 +1000,9 @@ public function testChildren()

$nodes = $crawler->children();
$this->assertEquals(3, $nodes->count());
$this->assertEquals('One', $nodes->eq(0)->text());
$this->assertEquals('Two', $nodes->eq(1)->text());
$this->assertEquals('Three', $nodes->eq(2)->text());
$this->assertEquals('One', $nodes->eq(0)->value());
$this->assertEquals('Two', $nodes->eq(1)->value());
$this->assertEquals('Three', $nodes->eq(2)->value());

try {
$this->createTestCrawler()->filterXPath('//ol')->children();
Expand Down Expand Up @@ -1161,6 +1180,8 @@ public function createTestCrawler($uri = null)
<div id="child2" xmlns:foo="http://example.com"></div>
</div>
<div id="sibling"><img /></div>
<div id="text-whitespaces">one two
three </div>
</body>
</html>
');
Expand Down
0