8000 [HtmlSanitizer] Introduce HtmlSanitizer component by tgalopin · Pull Request #44681 · symfony/symfony · GitHub
[go: up one dir, main page]

Skip to content

[HtmlSanitizer] Introduce HtmlSanitizer component #44681

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/Symfony/Component/HtmlSanitizer/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/Tests export-ignore
/phpunit.xml.dist export-ignore
/.gitattributes export-ignore
/.gitignore export-ignore
3 changes: 3 additions & 0 deletions src/Symfony/Component/HtmlSanitizer/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
composer.lock
phpunit.xml
vendor/
7 changes: 7 additions & 0 deletions src/Symfony/Component/HtmlSanitizer/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CHANGELOG
=========

6.1
---

* Add the component as experimental
131 changes: 131 additions & 0 deletions src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php
4BF4
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\HtmlSanitizer;

use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser;
use Symfony\Component\HtmlSanitizer\Parser\ParserInterface;
use Symfony\Component\HtmlSanitizer\Reference\W3CReference;
use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer;
use Symfony\Component\HtmlSanitizer\Visitor\DomVisitor;

/**
* @au 8000 thor Titouan Galopin <galopintitouan@gmail.com>
*
* @experimental
*/
final class HtmlSanitizer implements HtmlSanitizerInterface
{
private HtmlSanitizerConfig $config;
private int $maxInputLength;
private ParserInterface $parser;

/**
* @var array<string, DomVisitor>
*/
private array $domVisitors = [];

public function __construct(HtmlSanitizerConfig $config, int $maxInputLength = 20000, ParserInterface $parser = null)
{
$this->config = $config;
$this->maxInputLength = $maxInputLength;
$this->parser = $parser ?? new MastermindsParser();
}

public function sanitize(string $input): string
{
return $this->sanitizeWithContext(W3CReference::CONTEXT_BODY, $input);
}

public function sanitizeFor(string $element, string $input): string
{
return $this->sanitizeWithContext(
W3CReference::CONTEXTS_MAP[StringSanitizer::htmlLower($element)] ?? W3CReference::CONTEXT_BODY,
$input
);
}

private function sanitizeWithContext(string $context, string $input): string
{
// Text context: early return with HTML encoding
if (W3CReference::CONTEXT_TEXT === $context) {
return StringSanitizer::encodeHtmlEntities($input);
}

// Other context: build a DOM visitor
$this->domVisitors[$context] ??= $this->createDomVisitorForContext($context);

// Prevent DOS attack induced by extremely long HTML strings
if (\strlen($input) > $this->maxInputLength) {
$input = substr($input, 0, $this->maxInputLength);
}

// Only operate on valid UTF-8 strings. This is necessary to prevent cross
// site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss).
if (!$this->isValidUtf8($input)) {
return '';
}

// Remove NULL character
$input = str_replace(\chr(0), '', $input);

// Parse as HTML
if (!$parsed = $this->parser->parse($input)) {
return '';
}

// Visit the DOM tree and render the sanitized nodes
return $this->domVisitors[$context]->visit($parsed)?->render() ?? '';
}

private function isValidUtf8(string $html): bool
{
// preg_match() fails silently on strings containing invalid UTF-8.
return '' === $html || preg_match('//u', $html);
}

private function createDomVisitorForContext(string $context): DomVisitor
{
$elementsConfig = [];

// Head: only a few elements are allowed
if (W3CReference::CONTEXT_HEAD === $context) {
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
if (\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$allowedElement] = $allowedAttributes;
}
}

foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
if (\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$blockedElement] = false;
}
}

return new DomVisitor($this->config, $elementsConfig);
}

// Body: allow any configured element that isn't in <head>
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
if (!\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$allowedElement] = $allowedAttributes;
}
}

foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
if (!\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) {
$elementsConfig[$blockedElement] = false;
}
}

return new DomVisitor($this->config, $elementsConfig);
}
}
Loading
0