File tree 2 files changed +11
-14
lines changed
src/Symfony/Component/DomCrawler
2 files changed +11
-14
lines changed Original file line number Diff line number Diff line change @@ -156,24 +156,17 @@ public function addContent($content, $type = null)
156
156
return ;
157
157
}
158
158
159
- $ charset = null ;
160
- if (false !== $ pos = stripos ($ type , 'charset= ' )) {
161
- $ charset = substr ($ type , $ pos + 8 );
162
- if (false !== $ pos = strpos ($ charset , '; ' )) {
163
- $ charset = substr ($ charset , 0 , $ pos );
164
- }
165
- }
159
+ $ charset = preg_match ('//u ' , $ content ) ? 'UTF-8 ' : 'ISO-8859-1 ' ;
166
160
167
161
// http://www.w3.org/TR/encoding/#encodings
168
162
// http://www.w3.org/TR/REC-xml/#NT-EncName
169
- if ( null === $ charset &&
170
- preg_match ( ' /\<meta[^\>]+ charset *= *[" \' ]?([a-zA-Z\-0-9_:.]+)/i ' , $ content , $ matches )) {
171
- $ charset = $ matches [ 1 ];
172
- }
163
+ $ content = preg_replace_callback ( ' /(charset *= *[" \' ]?)([a-zA-Z\-0-9_:.]+)/i ' , function ( $ m ) use (& $ charset ) {
164
+ if ( ' charset= ' === $ this -> convertToHtmlEntities ( ' charset= ' , $ m [ 2 ] )) {
165
+ $charset = $ m [ 2 ];
166
+ }
173
167
174
- if (null === $ charset ) {
175
- $ charset = preg_match ('//u ' , $ content ) ? 'UTF-8 ' : 'ISO-8859-1 ' ;
176
- }
168
+ return $ m [1 ].$ charset ;
169
+ }, $ content , 1 );
177
170
178
171
if ('x ' === $ xmlMatches [1 ]) {
179
172
$ this ->addXmlContent ($ content , $ charset );
Original file line number Diff line number Diff line change @@ -187,6 +187,10 @@ public function testAddContent()
187
187
$ crawler = $ this ->createCrawler ();
188
188
$ crawler ->addContent ($ this ->getDoctype ().'<html><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><span>中文</span></html> ' );
189
189
$ this ->assertEquals ('中文 ' , $ crawler ->filterXPath ('//span ' )->text (), '->addContent() guess wrong charset ' );
190
+
191
+ $ crawler = $ this ->createCrawler ();
192
+ $ crawler ->addContent ($ this ->getDoctype ().'<html><meta http-equiv="Content-Type" content="text/html; charset=unicode" /><div class="foo"></html></html> ' );
193
+ $ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->addContent() ignores bad charset ' );
190
194
}
191
195
192
196
/**
You can’t perform that action at this time.
0 commit comments