8000 merged branch symfony/domcrawler-encoding (PR #4214) · symfony/symfony@919604a · GitHub
[go: up one dir, main page]

Skip to content

Commit 919604a

Browse files
committed
merged branch symfony/domcrawler-encoding (PR #4214)
Commits ------- c9ebe67 [DomCrawler] fixed encoding when using addHtmlContent() (fixes #3881) Discussion ---------- [DomCrawler] fixed encoding when using addHtmlContent() (fixes #3881) After looking around, this is clear that loadHtml() resets the encoding set on the DomDocument instance. So, the only workaround that actually works (and which is not an ugly hack) is to use `mb_convert_encoding` when it exists. --------------------------------------------------------------------------- by Seldaek at 2012-05-07T12:38:43Z +1 (Side note: Using your fork of symfony for PRs would be good I think, otherwise it creates noisy versions on packagist.)
2 parents 1dd92b6 + c9ebe67 commit 919604a

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ public function addHtmlContent($content, $charset = 'UTF-8')
129129
$dom = new \DOMDocument('1.0', $charset);
130130
$dom->validateOnParse = true;
131131

132+
if (function_exists('mb_convert_encoding')) {
133+
$content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
134+
}
135+
132136
$current = libxml_use_internal_errors(true);
133137
@$dom->loadHTML($content);
134138
libxml_use_internal_errors($current);

src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,17 @@ public function testAddHtmlContent()
6969
$this->assertEquals('http://symfony.com/contact', $crawler->filterXPath('//a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string');
7070
}
7171

72+
/**
73+
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
74+
*/
75+
public function testAddHtmlContentCharset()
76+
{
77+
$crawler = new Crawler();
78+
$crawler->addHtmlContent('<html><div class="foo">Tiếng Việt</html>', 'UTF-8');
79+
80+
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
81+
}
82+
7283
/**
7384
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
7485
*/
@@ -108,6 +119,17 @@ public function testAddXmlContent()
108119
$this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addXmlContent() adds nodes from an XML string');
109120
}
110121

122+
/**
123+
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
124+
*/
125+
public function testAddXmlContentCharset()
126+
{
127+
$crawler = new Crawler();
128+
$crawler->addXmlContent('<html><div class="foo">Tiếng Việt</div></html>', 'UTF-8');
129+
130+
$this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
131+
}
132+
111133
/**
112134
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
113135
*/

0 commit comments

Comments
 (0)
0