8000 add a value() method, normalize whitespaces · symfony/symfony@26860d6 · GitHub
[go: up one dir, main page]

Skip to content

Commit 26860d6

Browse files
committed
add a value() method, normalize whitespaces
1 parent cfbe2ff commit 26860d6

File tree

2 files changed

+78
-35
lines changed

2 files changed

+78
-35
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,29 @@ public function nodeName()
562562
*
563563
* @throws \InvalidArgumentException When current node is empty
564564
*/
565-
public function text()
565+
public function text($normalizeWhitespaces = false)
566+
{
567+
if (!$normalizeWhitespaces) {
568+
@trigger_error(sprintf('Not normalizing whitespace characters when using the %s() method is deprecated since version 3.4. The %s() method will return the first node\'s value with whitespaces being normalized in Symfony 4.0. Use the value() method instead if you do not want to have whitespaces normalized.', __METHOD__, __METHOD__), E_USER_DEPRECATED);
569+
}
570+
571+
$value = $this->value();
572+
573+
if ($normalizeWhitespaces) {
574+
$value = trim(preg_replace('/\s++/', ' ', $value));
575+
}
576+
577+
return $value;
578+
}
579+
580+
/**
581+
* Returns the node value of the first node of the list.
582+
*
583+
* @return string The node value
584+
*
585+
* @throws \InvalidArgumentException When current node is empty
586+
*/
587+
public function value()
566588
{
567589
if (!$this->nodes) {
568590
throw new \InvalidArgumentException('The current node list is empty.');

src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php

Lines changed: 55 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public function testAdd()
6666

6767
$crawler = new Crawler();
6868
$crawler->add('<html><body>Foo</body></html>');
69-
$this->assertEquals('Foo', $crawler->filterXPath('//body')->text(), '->add() adds nodes from a string');
69+
$this->assertEquals('Foo', $crawler->filterXPath('//body')->value(), '->add() adds nodes from a string');
7070
}
7171

7272
/**
@@ -114,7 +114,7 @@ public function testAddHtmlContentCharset()
114114
$crawler = new Crawler();
115115
$crawler->addHtmlContent('<html><div class="foo">Tiếng Việt</html>', 'UTF-8');
116116

117-
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
117+
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->value());
118118
}
119119

120120
public function testAddHtmlContentInvalidBaseTag()
@@ -131,7 +131,7 @@ public function testAddHtmlContentUnsupportedCharset()
131131
$crawler = new Crawler();
132132
$crawler->addHtmlContent(file_get_contents(__DIR__.'/Fixtures/windows-1250.html'), 'Windows-1250');
133133

134-
$this->assertEquals('Žťčýů', $crawler->filterXPath('//p')->text());
134+
$this->assertEquals('Žťčýů', $crawler->filterXPath('//p')->value());
135135
}
136136

137137
/**
@@ -143,7 +143,7 @@ public function testAddHtmlContentCharsetGbk()
143143
//gbk encode of <html><p>中文</p></html>
144144
$crawler->addHtmlContent(base64_decode('PGh0bWw+PHA+1tDOxDwvcD48L2h0bWw+'), 'gbk');
145145

146-
$this->assertEquals('中文', $crawler->filterXPath('//p')->text());
146+
$this->assertEquals('中文', $crawler->filterXPath('//p')->value());
147147
}
148148

149149
public function testAddHtmlContentWithErrors()
@@ -184,7 +184,7 @@ public function testAddXmlContentCharset()
184184
$crawler = new Crawler();
185185
$crawler->addXmlContent('<html><div class="foo">Tiếng Việt</div></html>', 'UTF-8');
186186

187-
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
187+
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->value());
188188
}
189189

190190
public function testAddXmlContentWithErrors()
@@ -238,7 +238,7 @@ public function testAddContent()
238238

239239
$crawler = new Crawler();
240240
$crawler->addContent('<html><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><span>中文</span></html>');
241-
$this->assertEquals('中文', $crawler->filterXPath('//span')->text(), '->addContent() guess wrong charset');
241+
$this->assertEquals('中文', $crawler->filterXPath('//span')->value(), '->addContent() guess wrong charset');
242242
}
243243

244244
/**
@@ -248,7 +248,7 @@ public function testAddContentNonUtf8()
248248
{
249249
$crawler = new Crawler();
250250
$crawler->addContent(iconv('UTF-8', 'SJIS', '<html><head><meta charset="Shift_JIS"></head><body>日本語</body></html>'));
251-
$this->assertEquals('日本語', $crawler->filterXPath('//body')->text(), '->addContent() can recognize "Shift_JIS" in html5 meta charset tag');
251+
$this->assertEquals('日本語', $crawler->filterXPath('//body')->value(), '->addContent() can recognize "Shift_JIS" in html5 meta charset tag');
252252
}
253253

254254
public function testAddDocument()
@@ -304,14 +304,14 @@ public function testEq()
304304
$this->assertNotSame($crawler, $crawler->eq(0), '->eq() returns a new instance of a crawler');
305305
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->eq() returns a new instance of a crawler');
306306

307-
$this->assertEquals('Two', $crawler->eq(1)->text(), '->eq() returns the nth node of the list');
307+
$this->assertEquals('Two', $crawler->eq(1)->value(), '->eq() returns the nth node of the list');
308308
$this->assertCount(0, $crawler->eq(100), '->eq() returns an empty crawler if the nth node does not exist');
309309
}
310310

311311
public function testEach()
312312
{
313313
$data = $this->createTestCrawler()->filterXPath('//ul[1]/li')->each(function ($node, $i) {
314-
return $i.'-'.$node->text();
314+
return $i.'-'.$node->value();
315315
});
316316

317317
$this->assertEquals(array('0-One', '1-Two', '2-Three'), $data, '->each() executes an anonymous function on each node of the list');
@@ -382,16 +382,35 @@ public function testNodeName()
382382
}
383383
}
384384

385+
/**
386+
* @group legacy
387+
* @expectedException \InvalidArgumentException
388+
*/
385389
public function testText()
386390
{
387-
$this->assertEquals('One', $this->createTestCrawler()->filterXPath('//li')->text(), '->text() returns the node value of the first element of the node list');
391+
$this->assertEquals("one two\nthree ", $this->createTestCrawler()->filterXPath('//div[@id="text-whitespaces"]')->text(), '->text() returns the node value of the first element of the node list');
388392

389-
try {
390-
$this->createTestCrawler()->filterXPath('//ol')->text();
391-
$this->fail('->text() throws an \InvalidArgumentException if the node list is empty');
392-
} catch (\InvalidArgumentException $e) {
393-
$this->assertTrue(true, '->text() throws an \InvalidArgumentException if the node list is empty');
394-
}
393+
$this->createTestCrawler()->filterXPath('//ol')->text();
394+
}
395+
396+
/**
397+
* @expectedException \InvalidArgumentException
398+
*/
399+
public function testTextWithNormalizedWhitespaces()
400+
{
401+
$this->assertEquals("one two three", $this->createTestCrawler()->filterXPath('//div[@id="text-whitespaces"]')->text(true), '->text() returns the node value of the first element of the node list');
402+
403+
$this->createTestCrawler()->filterXPath('//ol')->text(true);
404+
}
405+
406+
/**
407+
* @expectedException \InvalidArgumentException
408+
*/
409+
public function testValue()
410+
{
411+
$this->assertEquals("one two\nthree ", $this->createTestCrawler()->filterXPath('//div[@id="text-whitespaces"]')->value(), '->value() returns the node value of the first element of the node list');
412+
413+
$this->createTestCrawler()->filterXPath('//ol')->value();
395414
}
396415

397416
public function testHtml()
@@ -463,7 +482,7 @@ public function testFilterXPathWithDefaultNamespace()
463482
{
464483
$crawler = $this->createTestXmlCrawler()->filterXPath('//default:entry/default:id');
465484
$this->assertCount(1, $crawler, '->filterXPath() automatically registers a namespace');
466-
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
485+
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->value());
467486
}
468487

469488
public function testFilterXPathWithCustomDefaultNamespace()
@@ -473,7 +492,7 @@ public function testFilterXPathWithCustomDefaultNamespace()
473492
$crawler = $crawler->filterXPath('//x:entry/x:id');
474493

475494
$this->assertCount(1, $crawler, '->filterXPath() lets to override the default namespace prefix');
476-
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
495+
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->value());
477496
}
478497

479498
public function testFilterXPathWithNamespace()
@@ -486,7 +505,7 @@ public function testFilterXPathWithMultipleNamespaces()
486505
{
487506
$crawler = $this->createTestXmlCrawler()->filterXPath('//media:group/yt:aspectRatio');
488507
$this->assertCount(1, $crawler, '->filterXPath() automatically registers multiple namespaces');
489-
$this->assertSame('widescreen', $crawler->text());
508+
$this->assertSame('widescreen', $crawler->value());
490509
}
491510

492511
public function testFilterXPathWithManuallyRegisteredNamespace()
@@ -496,7 +515,7 @@ public function testFilterXPathWithManuallyRegisteredNamespace()
496515

497516
$crawler = $crawler->filterXPath('//m:group/yt:aspectRatio');
498517
$this->assertCount(1, $crawler, '->filterXPath() uses manually registered namespace');
499-
$this->assertSame('widescreen', $crawler->text());
518+
$this->assertSame('widescreen', $crawler->value());
500519
}
501520

502521
public function testFilterXPathWithAnUrl()
@@ -505,7 +524,7 @@ public function testFilterXPathWithAnUrl()
505524

506525
$crawler = $crawler->filterXPath('//media:category[@scheme="http://gdata.youtube.com/schemas/2007/categories.cat"]');
507526
$this->assertCount(1, $crawler);
508-
$this->assertSame('Music', $crawler->text());
527+
$this->assertSame('Music', $crawler->value());
509528
}
510529

511530
public function testFilterXPathWithFakeRoot()
@@ -622,7 +641,7 @@ public function testFilterWithDefaultNamespace()
622641
{
623642
$crawler = $this->createTestXmlCrawler()->filter('default|entry default|id');
624643
$this->assertCount(1, $crawler, '->filter() automatically registers namespaces');
625-
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
644+
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->value());
626645
}
627646

628647
public function testFilterWithNamespace()
@@ -635,7 +654,7 @@ public function testFilterWithMultipleNamespaces()
635654
{
636655
$crawler = $this->createTestXmlCrawler()->filter('media|group yt|aspectRatio');
637656
$this->assertCount(1, $crawler, '->filter() automatically registers namespaces');
638-
$this->assertSame('widescreen', $crawler->text());
657+
$this->assertSame('widescreen', $crawler->value());
639658
}
640659

641660
public function testFilterWithDefaultNamespaceOnly()
@@ -901,7 +920,7 @@ public function testLast()
901920
$this->assertNotSame($crawler, $crawler->last(), '->last() returns a new instance of a crawler');
902921
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->last() returns a new instance of a crawler');
903922

904-
$this->assertEquals('Three', $crawler->last()->text());
923+
$this->assertEquals('Three', $crawler->last()->value());
905924
}
906925

907926
public function testFirst()
@@ -910,7 +929,7 @@ public function testFirst()
910929
$this->assertNotSame($crawler, $crawler->first(), '->first() returns a new instance of a crawler');
911930
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->first() returns a new instance of a crawler');
912931

913-
$this->assertEquals('One', $crawler->first()->text());
932+
$this->assertEquals('One', $crawler->first()->value());
914933
}
915934

916935
public function testSiblings()
@@ -921,13 +940,13 @@ public function testSiblings()
921940

922941
$nodes = $crawler->siblings();
923942
$this->assertEquals(2, $nodes->count());
924-
$this->assertEquals('One', $nodes->eq(0)->text());
925-
$this->assertEquals('Three', $nodes->eq(1)->text());
943+
$this->assertEquals('One', $nodes->eq(0)->value());
944+
$this->assertEquals('Three', $nodes->eq(1)->value());
926945

927946
$nodes = $this->createTestCrawler()->filterXPath('//li')->eq(0)->siblings();
928947
$this->assertEquals(2, $nodes->count());
929-
$this->assertEquals('Two', $nodes->eq(0)->text());
930-
$this->assertEquals('Three', $nodes->eq(1)->text());
948+
$this->assertEquals('Two', $nodes->eq(0)->value());
949+
$this->assertEquals('Three', $nodes->eq(1)->value());
931950

932951
try {
933952
$this->createTestCrawler()->filterXPath('//ol')->siblings();
@@ -945,7 +964,7 @@ public function testNextAll()
945964

946965
$nodes = $crawler->nextAll();
947966
$this->assertEquals(1, $nodes->count());
948-
$this->assertEquals('Three', $nodes->eq(0)->text());
967+
$this->assertEquals('Three', $nodes->eq(0)->value());
949968

950969
try {
951970
$this->createTestCrawler()->filterXPath('//ol')->nextAll();
@@ -963,7 +982,7 @@ public function testPreviousAll()
963982

964983
$nodes = $crawler->previousAll();
965984
$this->assertEquals(2, $nodes->count());
966-
$this->assertEquals('Two', $nodes->eq(0)->text());
985+
$this->assertEquals('Two', $nodes->eq(0)->value());
967986

968987
try {
969988
$this->createTestCrawler()->filterXPath('//ol')->previousAll();
@@ -981,9 +1000,9 @@ public function testChildren()
9811000

9821001
$nodes = $crawler->children();
9831002
$this->assertEquals(3, $nodes->count());
984-
$this->assertEquals('One', $nodes->eq(0)->text());
985-
$this->assertEquals('Two', $nodes->eq(1)->text());
986-
$this->assertEquals('Three', $nodes->eq(2)->text());
1003+
$this->assertEquals('One', $nodes->eq(0)->value());
1004+
$this->assertEquals('Two', $nodes->eq(1)->value());
1005+
$this->assertEquals('Three', $nodes->eq(2)->value());
9871006

9881007
try {
9891008
$this->createTestCrawler()->filterXPath('//ol')->children();
@@ -1161,6 +1180,8 @@ public function createTestCrawler($uri = null)
11611180
<div id="child2" xmlns:foo="http://example.com"></div>
11621181
</div>
11631182
<div id="sibling"><img /></div>
1183+
<div id="text-whitespaces">one two
1184+
three </div>
11641185
</body>
11651186
</html>
11661187
');

0 commit comments

Comments
0 (0)
0