8000 bug #9323 [DomCrawler]fix #9321 Crawler::addHtmlContent add gbk encod… · symfony/symfony@e0126d9 · GitHub
[go: up one dir, main page]

Skip to content

Commit e0126d9

Browse files
committed
bug #9323 [DomCrawler]fix #9321 Crawler::addHtmlContent add gbk encoding support (bronze1man)
This PR was submitted for the 2.2 branch but it was merged into the 2.3 branch instead (closes #9323). Discussion ---------- [DomCrawler]fix #9321 Crawler::addHtmlContent add gbk encoding support | Q | A | ------------- | --- | Bug fix? | yes | New feature? | no | BC breaks? | no | Deprecations? | no | Tests pass? | yes | Fixed tickets | #9321 | License | MIT | Doc PR | n/a This is solution 1 in #9321 (comment) Commits ------- 30af288 fix some cs 9f20b24 use restore_error_handler instead of set_error_handler($previous) 53cb6ad [DomCrawler]fix #9321 Crawler::addHtmlContent add gbk encoding support
2 parents 0285bfd + e26e564 commit e0126d9

File tree

2 files changed

+24
-2
lines changed

2 files changed

+24
-2
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,18 @@ public function addHtmlContent($content, $charset = 'UTF-8')
147147
$dom = new \DOMDocument('1.0', $charset);
148148
$dom->validateOnParse = true;
149149

150-
if (function_exists('mb_convert_encoding') && in_array(strtolower($charset), array_map('strtolower', mb_list_encodings()))) {
151-
$content = mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
150+
if (function_exists('mb_convert_encoding')) {
151+
$hasError = false;
152+
set_error_handler(function () use (&$hasError) {
153+
$hasError = true;
154+
});
155+
$tmpContent = @mb_convert_encoding($content, 'HTML-ENTITIES', $charset);
156+
157+
restore_error_handler();
158+
159+
if (!$hasError) {
160+
$content = $tmpContent;
161+
}
152162
}
153163

154164
@$dom->loadHTML($content);

src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,18 @@ public function testAddHtmlContentUnsupportedCharset()
112112
$this->assertEquals('Žťčýů', $crawler->filterXPath('//p')->text());
113113
}
114114

115+
/**
116+
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
117+
*/
118+
public function testAddHtmlContentCharsetGbk()
119+
{
120+
$crawler = new Crawler();
121+
//gbk encode of <html><p>中文</p></html>
122+
$crawler->addHtmlContent(base64_decode('PGh0bWw+PHA+1tDOxDwvcD48L2h0bWw+'), 'gbk');
123+
124+
$this->assertEquals('中文', $crawler->filterXPath('//p')->text());
125+
}
126+
115127
/**
116128
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
117129
*/

0 commit comments

Comments
 (0)
0