8000 [DomCrawler] added a way to get parsing errors for Crawler::addHtmlCo… · symfony/form@a57a4af · GitHub
[go: up one dir, main page]

Skip to content

Commit a57a4af

Browse files
committed
[DomCrawler] added a way to get parsing errors for Crawler::addHtmlContent() and Crawler::addXmlContent() via libxml functions
1 parent 382a421 commit a57a4af

File tree

3 files changed

+75
-0
lines changed

3 files changed

+75
-0
lines changed

CHANGELOG-2.1.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ To get the diff between two versions, go to https://github.com/symfony/symfony/c
3535

3636
### DomCrawler
3737

38+
* added a way to get parsing errors for Crawler::addHtmlContent() and Crawler::addXmlContent() via libxml functions
3839
* added support for submitting a form without a submit button
3940

4041
### Finder

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,13 @@ public function addContent($content, $type = null)
107107
/**
108108
* Adds an HTML content to the list of nodes.
109109
*
110+
* The libxml errors are disabled when the content is parsed.
111+
*
112+
* If you want to get parsing errors, be sure to enable
113+
* internal errors via libxml_use_internal_errors(true)
114+
* and then, get the errors via libxml_get_errors(). Be
115+
* sure to clear errors with libxml_clear_errors() afterward.
116+
*
110117
* @param string $content The HTML content
111118
* @param string $charset The charset
112119
*
@@ -117,7 +124,10 @@ public function addHtmlContent($content, $charset = 'UTF-8')
117124
$dom = new \DOMDocument('1.0', $charset);
118125
$dom->validateOnParse = true;
119126

127+
$current = libxml_use_internal_errors(true);
120128
@$dom->loadHTML($content);
129+
libxml_use_internal_errors($current);
130+
121131
$this->addDocument($dom);
122132

123133
$base = $this->filter('base')->extract(array('href'));
@@ -130,6 +140,13 @@ public function addHtmlContent($content, $charset = 'UTF-8')
130140
/**
131141
* Adds an XML content to the list of nodes.
132142
*
143+
* The libxml errors are disabled when the content is parsed.
144+
*
145+
* If you want to get parsing errors, be sure to enable
146+
* internal errors via libxml_use_internal_errors(true)
147+
* and then, get the errors via libxml_get_errors(). Be
148+
* sure to clear errors with libxml_clear_errors() afterward.
149+
*
133150
* @param string $content The XML content
134151
* @param string $charset The charset
135152
*
@@ -141,7 +158,10 @@ public function addXmlContent($content, $charset = 'UTF-8')
141158
$dom->validateOnParse = true;
142159

143160
// remove the default namespace to make XPath expressions simpler
161+
$current = libxml_use_internal_errors(true);
144162
@$dom->loadXML(str_replace('xmlns', 'ns', $content));
163+
libxml_use_internal_errors($current);
164+
145165
$this->addDocument($dom);
146166
}
147167

tests/Symfony/Tests/Component/DomCrawler/CrawlerTest.php

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,34 @@ public function testAddHtmlContent()
6969
$this->assertEquals('http://symfony.com/contact', $crawler->filter('a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string');
7070
}
7171

72+
/**
73+
* @covers Symfony\Component\DomCrawler\Crawler::addHtmlContent
74+
*/
75+
public function testAddHtmlContentWithErrors()
76+
{
77+
libxml_use_internal_errors(true);
78+
79+
$crawler = new Crawler();
80+
$crawler->addHtmlContent(<<<EOF
81+
<!DOCTYPE html>
82+
<html>
83+
<head>
84+
</head>
85+
<body>
86+
<nav><a href="#"><a href="#"></nav>
87+
</body>
88+
</html>
89+
EOF
90+
, 'UTF-8');
91+
92+
$errors = libxml_get_errors();
93+
$this->assertEquals(1, count($errors));
94+
$this->assertEquals("Tag nav invalid\n", $errors[0]->message);
95+
96+
libxml_clear_errors();
97+
libxml_use_internal_errors(false);
98+
}
99+
72100
/**
73101
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
74102
*/
@@ -80,6 +108,32 @@ public function testAddXmlContent()
80108
$this->assertEquals('foo', $crawler->filter('div')->attr('class'), '->addXmlContent() adds nodes from an XML string');
81109
}
82110

111+
/**
112+
* @covers Symfony\Component\DomCrawler\Crawler::addXmlContent
113+
*/
114+
public function testAddXmlContentWithErrors()
115+
{
116+
libxml_use_internal_errors(true);
117+
118+
$crawler = new Crawler();
119 6D47 +
$crawler->addXmlContent(<<<EOF
120+
<!DOCTYPE html>
121+
<html>
122+
<head>
123+
</head>
124+
<body>
125+
<nav><a href="#"><a href="#"></nav>
126+
</body>
127+
</html>
128+
EOF
129+
, 'UTF-8');
130+
131+
$this->assertTrue(count(libxml_get_errors()) > 1);
132+
133+
libxml_clear_errors();
134+
libxml_use_internal_errors(false);
135+
}
136+
83137
/**
84138
* @covers Symfony\Component\DomCrawler\Crawler::addContent
85139
*/

0 commit comments

Comments
 (0)
0