8000 [DomCrawler] fixed encoding when using addHtmlContent() (fixes #3881) · symfony/symfony@c9ebe67 · GitHub
[go: up one dir, main page]

Skip to content

Commit c9ebe67

Browse files
committed
[DomCrawler] fixed encoding when using addHtmlContent() (fixes #3881)
1 parent f14961b commit c9ebe67

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ public function addHtmlContent($content, $charset = 'UTF-8')
129129
$dom = new \DOMDocument('1.0', $charset);
130130
$dom->validateOnParse = true;
131131

132+
if (function_exists('mb_convert_encoding')) {
133+
$content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
134+
}
135+
132136
$current = libxml_use_internal_errors(true);
133137
@$dom->loadHTML($content);
134138
libxml_use_internal_errors($current);

src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,17 @@ public function testAddHtmlContent()
6969
$this->assertEquals('http://symfony.com/contact', $crawler->filterXPath('//a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string');
7070
}
7171

72+
/**
73+
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
74+
*/
75+
public function testAddHtmlContentCharset()
76+
{
77+
$crawler = new Crawler();
78+
$crawler->addHtmlContent('<html><div class="foo">Tiếng Việt</html>', 'UTF-8');
79+
80+
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
81+
}
82+
7283
/**
7384
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
7485
*/
@@ -108,6 +119,17 @@ public function testAddXmlContent()
108119
$this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addXmlContent() adds nodes from an XML string');
109120
}
110121

122+
/**
123+
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
124+
*/
125+
public function testAddXmlContentCharset()
126+
{
127+
$crawler = new Crawler();
128+
$crawler->addXmlContent('<html><div class="foo">Tiếng Việt</div></html>', 'UTF-8');
129+
130+
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
131+
}
132+
111133
/**
112134
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
113135
*/

0 commit comments

Comments
 (0)
0