8000 [DomCrawler] add a value() method, normalize whitespaces · symfony/symfony@76b78ee · GitHub
[go: up one dir, main page]

Skip to content

Commit 76b78ee

Browse files
committed
[DomCrawler] add a value() method, normalize whitespaces
1 parent f1dff5e commit 76b78ee

File tree

3 files changed

+79
-35
lines changed

3 files changed

+79
-35
lines changed

src/Symfony/Component/DomCrawler/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ CHANGELOG
55
-----
66

77
* Added `Form::getName()` method.
8+
* Added an argument to the `Crawler::text()` method to opt-in normalizing whitespaces.
9+
* Added the `Crawler::textContent()` method which behaves the same as `Crawler::text()` with the whitespace normalization
10+
parameter being passed.
11+
812

913
4.3.0
1014
-----

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,9 @@ public function nodeName()
554554
}
555555

556556
/**
557-
* Returns the node value of the first node of the list.
557+
* Returns the text of the first node of the list.
558+
*
559+
* Pass true as an argument to normalize whitespaces.
558560
*
559561
* @param mixed $default When provided and the current node is empty, this value is returned and no exception is thrown
560562
*
@@ -563,6 +565,25 @@ public function nodeName()
563565
* @throws \InvalidArgumentException When current node is empty
564566
*/
565567
public function text(/* $default = null */)
568+
{
569+
$textContent = $this->textContent();
570+
571+
// argument to be deprecated in 4.1, whitespace normalization to become the default behavior
572+
if (func_num_args() && func_get_arg(0)) {
573+
$textContent = trim(preg_replace('/\s++/', ' ', $textContent));
574+
}
575+
576+
return $textContent;
577+
}
578+
579+
/**
580+
* Returns the node value of the first node of the list.
581+
*
582+
* @return string The node value
583+
*
584+
* @throws \InvalidArgumentException When current node is empty
585+
*/
586+
public function textContent()
566587
{
567588
if (!$this->nodes) {
568589
if (0 < \func_num_args()) {

src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php

Lines changed: 53 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public function testAdd()
7373

7474
$crawler = $this->createCrawler();
7575
$crawler->add($this->getDoctype().'<html><body>Foo</body></html>');
76-
$this->assertEquals('Foo', $crawler->filterXPath('//body')->text(), '->add() adds nodes from a string');
76+
$this->assertEquals('Foo', $crawler->filterXPath('//body')->textContent(), '->add() adds nodes from a string');
7777
}
7878

7979
/**
@@ -120,7 +120,7 @@ public function testAddHtmlContentCharset()
120120
$crawler = $this->createCrawler();
121121
$crawler->addHtmlContent($this->getDoctype().'<html><div class="foo">Tiếng Việt</html>', 'UTF-8');
122122

123-
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
123+
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->textContent());
124124
}
125125

126126
public function testAddHtmlContentInvalidBaseTag()
@@ -140,7 +140,7 @@ public function testAddHtmlContentCharsetGbk()
140140
//gbk encode of <html><p>中文</p></html>
141141
$crawler->addHtmlContent($this->getDoctype().base64_decode('PGh0bWw+PHA+1tDOxDwvcD48L2h0bWw+'), 'gbk');
142142

143-
$this->assertEquals('中文', $crawler->filterXPath('//p')->text());
143+
$this->assertEquals('中文', $crawler->filterXPath('//p')->textContent());
144144
}
145145

146146
public function testAddXmlContent()
@@ -156,7 +156,7 @@ public function testAddXmlContentCharset()
156156
$crawler = $this->createCrawler();
157157
$crawler->addXmlContent($this->getDoctype().'<html><div class="foo">Tiếng Việt</div></html>', 'UTF-8');
158158

159-
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
159+
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->textContent());
160160
}
161161

162162
public function testAddContent()
@@ -187,7 +187,7 @@ public function testAddContent()
187187

188188
$crawler = $this->createCrawler();
189189
$crawler->addContent($this->getDoctype().'<html><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><span>中文</span></html>');
190-
$this->assertEquals('中文', $crawler->filterXPath('//span')->text(), '->addContent() guess wrong charset');
190+
$this->assertEquals('中文', $crawler->filterXPath('//span')->textContent(), '->addContent() guess wrong charset');
191191
}
192192

193193
/**
@@ -197,7 +197,7 @@ public function testAddContentNonUtf8()
197197
{
198198
$crawler = $this->createCrawler();
199199
$crawler->addContent(iconv('UTF-8', 'SJIS', $this->getDoctype().'<html><head><meta charset="Shift_JIS"></head><body>日本語</body></html>'));
200-
$this->assertEquals('日本語', $crawler->filterXPath('//body')->text(), '->addContent() can recognize "Shift_JIS" in html5 meta charset tag');
200+
$this->assertEquals('日本語', $crawler->filterXPath('//body')->textContent(), '->addContent() can recognize "Shift_JIS" in html5 meta charset tag');
201201
}
202202

203203
public function testAddDocument()
@@ -253,14 +253,14 @@ public function testEq()
253253
$this->assertNotSame($crawler, $crawler->eq(0), '->eq() returns a new instance of a crawler');
254254
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->eq() returns a new instance of a crawler');
255255

256-
$this->assertEquals('Two', $crawler->eq(1)->text(), '->eq() returns the nth node of the list');
256+
$this->assertEquals('Two', $crawler->eq(1)->textContent(), '->eq() returns the nth node of the list');
257257
$this->assertCount(0, $crawler->eq(100), '->eq() returns an empty crawler if the nth node does not exist');
258258
}
259259

260260
public function testEach()
261261
{
262262
$data = $this->createTestCrawler()->filterXPath('//ul[1]/li')->each(function ($node, $i) {
263-
return $i.'-'.$node->text();
263+
return $i.'-'.$node->textContent();
264264
});
265265

266266
$this->assertEquals(['0-One', '1-Two', '2-Three'], $data, '->each() executes an anonymous function on each node of the list');
@@ -333,16 +333,33 @@ public function testNodeName()
333333

334334
public function testText()
335335
{
336-
$this->assertEquals('One', $this->createTestCrawler()->filterXPath('//li')->text(), '->text() returns the node value of the first element of the node list');
336+
$this->assertEquals("one two\nthree ", $this->createTestCrawler()->filterXPath('//div[@id="text-whitespaces"]')->text(), '->text() returns the node value of the first element of the node list');
337+
}
337338

338-
try {
339-
$this->createTestCrawler()->filterXPath('//ol')->text();
340-
$this->fail('->text() throws an \InvalidArgumentException if the node list is empty');
341-
} catch (\InvalidArgumentException $e) {
342-
$this->assertTrue(true, '->text() throws an \InvalidArgumentException if the node list is empty');
343-
}
339+
public function testTextWithNormalizedWhitespaces()
340+
{
341+
$this->assertEquals('one two three', $this->createTestCrawler()->filterXPath('//div[@id="text-whitespaces"]')->text(true), '->text() returns the node value of the first element of the node list');
342+
}
344343

345-
$this->assertSame('my value', $this->createTestCrawler(null)->filterXPath('//ol')->text('my value'));
344+
/**
345+
* @expectedException \InvalidArgumentException
346+
*/
347+
public function testTextThrowsExceptionWhenNodeListIsEmpty()
348+
{
349+
$this->createTestCrawler()->filterXPath('//ol')->text(true);
350+
}
351+
352+
public function testValue()
353+
{
354+
$this->assertEquals("one two\nthree ", $this->createTestCrawler()->filterXPath('//div[@id="text-whitespaces"]')->textContent(), '->value() returns the node value of the first element of the node list');
355+
}
356+
357+
/**
358+
* @expectedException \InvalidArgumentException
359+
*/
360+
public function testValueThrowsExceptionWhenNodeListIsEmpty()
361+
{
362+
$this->createTestCrawler()->filterXPath('//ol')->textContent();
346363
}
347364

348365
public function testHtml()
@@ -419,7 +436,7 @@ public function testFilterXPathWithDefaultNamespace()
419436
{
420437
$crawler = $this->createTestXmlCrawler()->filterXPath('//default:entry/default:id');
421438
$this->assertCount(1, $crawler, '->filterXPath() automatically registers a namespace');
422-
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
439+
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->textContent());
423440
}
424441

425442
public function testFilterXPathWithCustomDefaultNamespace()
@@ -429,7 +446,7 @@ public function testFilterXPathWithCustomDefaultNamespace()
429446
$crawler = $crawler->filterXPath('//x:entry/x:id');
430447

431448
$this->assertCount(1, $crawler, '->filterXPath() lets to override the default namespace prefix');
432-
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
449+
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->textContent());
433450
}
434451

435452
public function testFilterXPathWithNamespace()
@@ -442,7 +459,7 @@ public function testFilterXPathWithMultipleNamespaces()
442459
{
443460
$crawler = $this->createTestXmlCrawler()->filterXPath('//media:group/yt:aspectRatio');
444461
$this->assertCount(1, $crawler, '->filterXPath() automatically registers multiple namespaces');
445-
$this->assertSame('widescreen', $crawler->text());
462+
$this->assertSame('widescreen', $crawler->textContent());
446463
}
447464

448465
public function testFilterXPathWithManuallyRegisteredNamespace()
@@ -452,7 +469,7 @@ public function testFilterXPathWithManuallyRegisteredNamespace()
452469

453470
$crawler = $crawler->filterXPath('//m:group/yt:aspectRatio');
454471
$this->assertCount(1, $crawler, '->filterXPath() uses manually registered namespace');
455-
$this->assertSame('widescreen', $crawler->text());
472+
$this->assertSame('widescreen', $crawler->textContent());
456473
}
457474

458475
public function testFilterXPathWithAnUrl()
@@ -461,7 +478,7 @@ public function testFilterXPathWithAnUrl()
461478

462479
$crawler = $crawler->filterXPath('//media:category[@scheme="http://gdata.youtube.com/schemas/2007/categories.cat"]');
463480
$this->assertCount(1, $crawler);
464-
$this->assertSame('Music', $crawler->text());
481+
$this->assertSame('Music', $crawler->textContent());
465482
}
466483

467484
public function testFilterXPathWithFakeRoot()
@@ -578,7 +595,7 @@ public function testFilterWithDefaultNamespace()
578595
{
579596
$crawler = $this->createTestXmlCrawler()->filter('default|entry default|id');
580597
$this->assertCount(1, $crawler, '->filter() automatically registers namespaces');
581-
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
598+
$this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->textContent());
582599
}
583600

584601
public function testFilterWithNamespace()
@@ -591,7 +608,7 @@ public function testFilterWithMultipleNamespaces()
591608
{
592609
$crawler = $this->createTestXmlCrawler()->filter('media|group yt|aspectRatio');
593610
$this->assertCount(1, $crawler, '->filter() automatically registers namespaces');
594-
$this->assertSame('widescreen', $crawler->text());
611+
$this->assertSame('widescreen', $crawler->textContent());
595612
}
596613

597614
public function testFilterWithDefaultNamespaceOnly()
@@ -854,7 +871,7 @@ public function testLast()
854871
$this->assertNotSame($crawler, $crawler->last(), '->last() returns a new instance of a crawler');
855872
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->last() returns a new instance of a crawler');
856873

857-
$this->assertEquals('Three', $crawler->last()->text());
874+
$this->assertEquals('Three', $crawler->last()->textContent());
858875
}
859876

860877
public function testFirst()
@@ -863,7 +880,7 @@ public function testFirst()
863880
$this->assertNotSame($crawler, $crawler->first(), '->first() returns a new instance of a crawler');
864881
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->first() returns a new instance of a crawler');
865882

866-
$this->assertEquals('One', $crawler->first()->text());
883+
$this->assertEquals('One', $crawler->first()->textContent());
867884
}
868885

869886
public function testSiblings()
@@ -874,13 +891,13 @@ public function testSiblings()
874891
10000
875892
$nodes = $crawler->siblings();
876893
$this->assertEquals(2, $nodes->count());
877-
$this->assertEquals('One', $nodes->eq(0)->text());
878-
$this->assertEquals('Three', $nodes->eq(1)->text());
894+
$this->assertEquals('One', $nodes->eq(0)->textContent());
895+
$this->assertEquals('Three', $nodes->eq(1)->textContent());
879896

880897
$nodes = $this->createTestCrawler()->filterXPath('//li')->eq(0)->siblings();
881898
$this->assertEquals(2, $nodes->count());
882-
$this->assertEquals('Two', $nodes->eq(0)->text());
883-
$this->assertEquals('Three', $nodes->eq(1)->text());
899+
$this->assertEquals('Two', $nodes->eq(0)->textContent());
900+
$this->assertEquals('Three', $nodes->eq(1)->textContent());
884901

885902
try {
886903
$this->createTestCrawler()->filterXPath('//ol')->siblings();
@@ -898,7 +915,7 @@ public function testNextAll()
898915

899916
$nodes = $crawler->nextAll();
900917
$this->assertEquals(1, $nodes->count());
901-
$this->assertEquals('Three', $nodes->eq(0)->text());
918+
$this->assertEquals('Three', $nodes->eq(0)->textContent());
902919

903920
try {
904921
$this->createTestCrawler()->filterXPath('//ol')->nextAll();
@@ -916,7 +933,7 @@ public function testPreviousAll()
916933

917934
$nodes = $crawler->previousAll();
918935
$this->assertEquals(2, $nodes->count());
919-
$this->assertEquals('Two', $nodes->eq(0)->text());
936+
$this->assertEquals('Two', $nodes->eq(0)->textContent());
920937

921938
try {
922939
$this->createTestCrawler()->filterXPath('//ol')->previousAll();
@@ -934,9 +951,9 @@ public function testChildren()
934951

935952
$nodes = $crawler->children();
936953
$this->assertEquals(3, $nodes->count());
937-
$this->assertEquals('One', $nodes->eq(0)->text());
938-
$this->assertEquals('Two', $nodes->eq(1)->text());
939-
$this->assertEquals('Three', $nodes->eq(2)->text());
954+
$this->assertEquals('One', $nodes->eq(0)->textContent());
955+
$this->assertEquals('Two', $nodes->eq(1)->textContent());
956+
$this->assertEquals('Three', $nodes->eq(2)->textContent());
940957

941958
try {
942959
$this->createTestCrawler()->filterXPath('//ol')->children();
@@ -1209,6 +1226,8 @@ public function createTestCrawler($uri = null)
12091226
<div id="child2" xmlns:foo="http://example.com"></div>
12101227
</div>
12111228
<div id="sibling"><img /></div>
1229+
<div id="text-whitespaces">one two
1230+
three </div>
12121231
</body>
12131232
</html>
12141233
');

0 commit comments

Comments
 (0)
0