From 199e405fdd0dc119c462e4650c0864668ca9028c Mon Sep 17 00:00:00 2001 From: Titouan Galopin Date: Wed, 15 Dec 2021 18:10:28 +0100 Subject: [PATCH] [HtmlSanitizer] Introduce HtmlSanitizer component --- .../Component/HtmlSanitizer/.gitattributes | 4 + .../Component/HtmlSanitizer/.gitignore | 3 + .../Component/HtmlSanitizer/CHANGELOG.md | 7 + .../Component/HtmlSanitizer/HtmlSanitizer.php | 131 +++ .../HtmlSanitizer/HtmlSanitizerConfig.php | 486 +++++++++++ .../HtmlSanitizer/HtmlSanitizerInterface.php | 44 + src/Symfony/Component/HtmlSanitizer/LICENSE | 19 + .../Parser/MastermindsParser.php | 31 + .../HtmlSanitizer/Parser/ParserInterface.php | 29 + src/Symfony/Component/HtmlSanitizer/README.md | 115 +++ .../HtmlSanitizer/Reference/W3CReference.php | 400 +++++++++ .../Tests/HtmlSanitizerAllTest.php | 554 +++++++++++++ .../Tests/HtmlSanitizerConfigTest.php | 295 +++++++ .../Tests/HtmlSanitizerCustomTest.php | 428 ++++++++++ .../Tests/Parser/MastermindsParserTest.php | 27 + .../Tests/Reference/W3CReferenceTest.php | 55 ++ .../TextSanitizer/StringSanitizerTest.php | 76 ++ .../Tests/TextSanitizer/UrlSanitizerTest.php | 783 ++++++++++++++++++ .../TextSanitizer/StringSanitizer.php | 82 ++ .../TextSanitizer/UrlSanitizer.php | 136 +++ .../AttributeSanitizerInterface.php | 43 + .../UrlAttributeSanitizer.php | 53 ++ .../HtmlSanitizer/Visitor/DomVisitor.php | 176 ++++ .../HtmlSanitizer/Visitor/Model/Cursor.php | 26 + .../Visitor/Node/BlockedNode.php | 48 ++ .../Visitor/Node/DocumentNode.php | 42 + .../HtmlSanitizer/Visitor/Node/Node.php | 106 +++ .../Visitor/Node/NodeInterface.php | 39 + .../HtmlSanitizer/Visitor/Node/TextNode.php | 41 + .../Component/HtmlSanitizer/composer.json | 31 + .../Component/HtmlSanitizer/phpunit.xml.dist | 30 + 31 files changed, 4340 insertions(+) create mode 100644 src/Symfony/Component/HtmlSanitizer/.gitattributes create mode 100644 src/Symfony/Component/HtmlSanitizer/.gitignore create mode 100644 src/Symfony/Component/HtmlSanitizer/CHANGELOG.md create mode 100644 src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php create mode 100644 src/Symfony/Component/HtmlSanitizer/HtmlSanitizerConfig.php create mode 100644 src/Symfony/Component/HtmlSanitizer/HtmlSanitizerInterface.php create mode 100644 src/Symfony/Component/HtmlSanitizer/LICENSE create mode 100644 src/Symfony/Component/HtmlSanitizer/Parser/MastermindsParser.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Parser/ParserInterface.php create mode 100644 src/Symfony/Component/HtmlSanitizer/README.md create mode 100644 src/Symfony/Component/HtmlSanitizer/Reference/W3CReference.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerConfigTest.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerCustomTest.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Tests/Parser/MastermindsParserTest.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Tests/Reference/W3CReferenceTest.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/StringSanitizerTest.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/UrlSanitizerTest.php create mode 100644 src/Symfony/Component/HtmlSanitizer/TextSanitizer/StringSanitizer.php create mode 100644 src/Symfony/Component/HtmlSanitizer/TextSanitizer/UrlSanitizer.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Visitor/AttributeSanitizer/AttributeSanitizerInterface.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Visitor/AttributeSanitizer/UrlAttributeSanitizer.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Visitor/DomVisitor.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Visitor/Model/Cursor.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Visitor/Node/BlockedNode.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Visitor/Node/DocumentNode.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Visitor/Node/Node.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Visitor/Node/NodeInterface.php create mode 100644 src/Symfony/Component/HtmlSanitizer/Visitor/Node/TextNode.php create mode 100644 src/Symfony/Component/HtmlSanitizer/composer.json create mode 100644 src/Symfony/Component/HtmlSanitizer/phpunit.xml.dist diff --git a/src/Symfony/Component/HtmlSanitizer/.gitattributes b/src/Symfony/Component/HtmlSanitizer/.gitattributes new file mode 100644 index 0000000000000..84c7add058fb5 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/.gitattributes @@ -0,0 +1,4 @@ +/Tests export-ignore +/phpunit.xml.dist export-ignore +/.gitattributes export-ignore +/.gitignore export-ignore diff --git a/src/Symfony/Component/HtmlSanitizer/.gitignore b/src/Symfony/Component/HtmlSanitizer/.gitignore new file mode 100644 index 0000000000000..5414c2c655e72 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/.gitignore @@ -0,0 +1,3 @@ +composer.lock +phpunit.xml +vendor/ diff --git a/src/Symfony/Component/HtmlSanitizer/CHANGELOG.md b/src/Symfony/Component/HtmlSanitizer/CHANGELOG.md new file mode 100644 index 0000000000000..003f90de7ee87 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/CHANGELOG.md @@ -0,0 +1,7 @@ +CHANGELOG +========= + +6.1 +--- + + * Add the component as experimental diff --git a/src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php b/src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php new file mode 100644 index 0000000000000..78687d6cc2d45 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php @@ -0,0 +1,131 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer; + +use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser; +use Symfony\Component\HtmlSanitizer\Parser\ParserInterface; +use Symfony\Component\HtmlSanitizer\Reference\W3CReference; +use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; +use Symfony\Component\HtmlSanitizer\Visitor\DomVisitor; + +/** + * @author Titouan Galopin + * + * @experimental + */ +final class HtmlSanitizer implements HtmlSanitizerInterface +{ + private HtmlSanitizerConfig $config; + private int $maxInputLength; + private ParserInterface $parser; + + /** + * @var array + */ + private array $domVisitors = []; + + public function __construct(HtmlSanitizerConfig $config, int $maxInputLength = 20000, ParserInterface $parser = null) + { + $this->config = $config; + $this->maxInputLength = $maxInputLength; + $this->parser = $parser ?? new MastermindsParser(); + } + + public function sanitize(string $input): string + { + return $this->sanitizeWithContext(W3CReference::CONTEXT_BODY, $input); + } + + public function sanitizeFor(string $element, string $input): string + { + return $this->sanitizeWithContext( + W3CReference::CONTEXTS_MAP[StringSanitizer::htmlLower($element)] ?? W3CReference::CONTEXT_BODY, + $input + ); + } + + private function sanitizeWithContext(string $context, string $input): string + { + // Text context: early return with HTML encoding + if (W3CReference::CONTEXT_TEXT === $context) { + return StringSanitizer::encodeHtmlEntities($input); + } + + // Other context: build a DOM visitor + $this->domVisitors[$context] ??= $this->createDomVisitorForContext($context); + + // Prevent DOS attack induced by extremely long HTML strings + if (\strlen($input) > $this->maxInputLength) { + $input = substr($input, 0, $this->maxInputLength); + } + + // Only operate on valid UTF-8 strings. This is necessary to prevent cross + // site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss). + if (!$this->isValidUtf8($input)) { + return ''; + } + + // Remove NULL character + $input = str_replace(\chr(0), '', $input); + + // Parse as HTML + if (!$parsed = $this->parser->parse($input)) { + return ''; + } + + // Visit the DOM tree and render the sanitized nodes + return $this->domVisitors[$context]->visit($parsed)?->render() ?? ''; + } + + private function isValidUtf8(string $html): bool + { + // preg_match() fails silently on strings containing invalid UTF-8. + return '' === $html || preg_match('//u', $html); + } + + private function createDomVisitorForContext(string $context): DomVisitor + { + $elementsConfig = []; + + // Head: only a few elements are allowed + if (W3CReference::CONTEXT_HEAD === $context) { + foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) { + if (\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) { + $elementsConfig[$allowedElement] = $allowedAttributes; + } + } + + foreach ($this->config->getBlockedElements() as $blockedElement => $v) { + if (\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) { + $elementsConfig[$blockedElement] = false; + } + } + + return new DomVisitor($this->config, $elementsConfig); + } + + // Body: allow any configured element that isn't in + foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) { + if (!\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) { + $elementsConfig[$allowedElement] = $allowedAttributes; + } + } + + foreach ($this->config->getBlockedElements() as $blockedElement => $v) { + if (!\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) { + $elementsConfig[$blockedElement] = false; + } + } + + return new DomVisitor($this->config, $elementsConfig); + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/HtmlSanitizerConfig.php b/src/Symfony/Component/HtmlSanitizer/HtmlSanitizerConfig.php new file mode 100644 index 0000000000000..4782d3dbc5d07 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/HtmlSanitizerConfig.php @@ -0,0 +1,486 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer; + +use Symfony\Component\HtmlSanitizer\Reference\W3CReference; +use Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer\AttributeSanitizerInterface; + +/** + * @author Titouan Galopin + * + * @experimental + */ +class HtmlSanitizerConfig +{ + /** + * Elements that should be removed but their children should be retained. + * + * @var array + */ + private array $blockedElements = []; + + /** + * Elements that should be retained, with their allowed attributes. + * + * @var array> + */ + private array $allowedElements = []; + + /** + * Attributes that should always be added to certain elements. + * + * @var array> + */ + private array $forcedAttributes = []; + + /** + * Links schemes that should be retained, other being dropped. + * + * @var list + */ + private array $allowedLinkSchemes = ['http', 'https', 'mailto', 'tel']; + + /** + * Links hosts that should be retained (by default, all hosts are allowed). + * + * @var list|null + */ + private ?array $allowedLinkHosts = null; + + /** + * Should the sanitizer allow relative links (by default, they are dropped). + */ + private bool $allowRelativeLinks = false; + + /** + * Image/Audio/Video schemes that should be retained, other being dropped. + * + * @var list + */ + private array $allowedMediaSchemes = ['http', 'https', 'data']; + + /** + * Image/Audio/Video hosts that should be retained (by default, all hosts are allowed). + * + * @var list|null + */ + private ?array $allowedMediaHosts = null; + + /** + * Should the sanitizer allow relative media URL (by default, they are dropped). + */ + private bool $allowRelativeMedias = false; + + /** + * Should the URL in the sanitized document be transformed to HTTPS if they are using HTTP. + */ + private bool $forceHttpsUrls = false; + + /** + * Sanitizers that should be applied to specific attributes in addition to standard sanitization. + * + * @var list + */ + private array $attributeSanitizers; + + public function __construct() + { + $this->attributeSanitizers = [ + new Visitor\AttributeSanitizer\UrlAttributeSanitizer(), + ]; + } + + /** + * Allows all static elements and attributes from the W3C Sanitizer API standard. + * + * All scripts will be removed but the output may still contain other dangerous + * behaviors like CSS injection (click-jacking), CSS expressions, ... + */ + public function allowAllStaticElements(): static + { + $elements = array_merge( + array_keys(W3CReference::HEAD_ELEMENTS), + array_keys(W3CReference::BODY_ELEMENTS) + ); + + $clone = clone $this; + foreach ($elements as $element) { + $clone = $clone->allowElement($element, '*'); + } + + return $clone; + } + + /** + * Allows "safe" elements and attributes. + * + * All scripts will be removed, as well as other dangerous behaviors like CSS injection. + */ + public function allowSafeElements(): static + { + $attributes = []; + foreach (W3CReference::ATTRIBUTES as $attribute => $isSafe) { + if ($isSafe) { + $attributes[] = $attribute; + } + } + + $clone = clone $this; + + foreach (W3CReference::HEAD_ELEMENTS as $element => $isSafe) { + if ($isSafe) { + $clone = $clone->allowElement($element, $attributes); + } + } + + foreach (W3CReference::BODY_ELEMENTS as $element => $isSafe) { + if ($isSafe) { + $clone = $clone->allowElement($element, $attributes); + } + } + + return $clone; + } + + /** + * Allows only a given list of schemes to be used in links href attributes. + * + * All other schemes will be dropped. + * + * @param list $allowLinkSchemes + */ + public function allowLinkSchemes(array $allowLinkSchemes): static + { + $clone = clone $this; + $clone->allowedLinkSchemes = $allowLinkSchemes; + + return $clone; + } + + /** + * Allows only a given list of hosts to be used in links href attributes. + * + * All other hosts will be dropped. By default all hosts are allowed + * ($allowedLinkHosts = null). + * + * @param list|null $allowLinkHosts + */ + public function allowLinkHosts(?array $allowLinkHosts): static + { + $clone = clone $this; + $clone->allowedLinkHosts = $allowLinkHosts; + + return $clone; + } + + /** + * Allows relative URLs to be used in links href attributes. + */ + public function allowRelativeLinks(bool $allowRelativeLinks = true): static + { + $clone = clone $this; + $clone->allowRelativeLinks = $allowRelativeLinks; + + return $clone; + } + + /** + * Allows only a given list of schemes to be used in media source attributes (img, audio, video, ...). + * + * All other schemes will be dropped. + * + * @param list $allowMediaSchemes + */ + public function allowMediaSchemes(array $allowMediaSchemes): static + { + $clone = clone $this; + $clone->allowedMediaSchemes = $allowMediaSchemes; + + return $clone; + } + + /** + * Allows only a given list of hosts to be used in media source attributes (img, audio, video, ...). + * + * All other hosts will be dropped. By default all hosts are allowed + * ($allowMediaHosts = null). + * + * @param list|null $allowMediaHosts + */ + public function allowMediaHosts(?array $allowMediaHosts): static + { + $clone = clone $this; + $clone->allowedMediaHosts = $allowMediaHosts; + + return $clone; + } + + /** + * Allows relative URLs to be used in media source attributes (img, audio, video, ...). + */ + public function allowRelativeMedias(bool $allowRelativeMedias = true): static + { + $clone = clone $this; + $clone->allowRelativeMedias = $allowRelativeMedias; + + return $clone; + } + + /** + * Transforms URLs using the HTTP scheme to use the HTTPS scheme instead. + */ + public function forceHttpsUrls(bool $forceHttpsUrls = true): static + { + $clone = clone $this; + $clone->forceHttpsUrls = $forceHttpsUrls; + + return $clone; + } + + /** + * Configures the given element as allowed. + * + * Allowed elements are elements the sanitizer should retain from the input. + * + * A list of allowed attributes for this element can be passed as a second argument. + * Passing "*" will allow all standard attributes on this element. By default, no + * attributes are allowed on the element. + * + * @param list|string $allowedAttributes + */ + public function allowElement(string $element, array|string $allowedAttributes = []): static + { + $clone = clone $this; + + // Unblock the element is necessary + unset($clone->blockedElements[$element]); + + $clone->allowedElements[$element] = []; + + $attrs = ('*' === $allowedAttributes) ? array_keys(W3CReference::ATTRIBUTES) : (array) $allowedAttributes; + foreach ($attrs as $allowedAttr) { + $clone->allowedElements[$element][$allowedAttr] = true; + } + + return $clone; + } + + /** + * Configures the given element as blocked. + * + * Blocked elements are elements the sanitizer should remove from the input, but retain + * their children. + */ + public function blockElement(string $element): static + { + $clone = clone $this; + + // Disallow the element is necessary + unset($clone->allowedElements[$element]); + + $clone->blockedElements[$element] = true; + + return $clone; + } + + /** + * Configures the given element as dropped. + * + * Dropped elements are elements the sanitizer should remove from the input, including + * their children. + * + * Note: when using an empty configuration, all unknown elements are dropped + * automatically. This method let you drop elements that were allowed earlier + * in the configuration. + */ + public function dropElement(string $element): static + { + $clone = clone $this; + unset($clone->allowedElements[$element], $clone->blockedElements[$element]); + + return $clone; + } + + /** + * Configures the given attribute as allowed. + * + * Allowed attributes are attributes the sanitizer should retain from the input. + * + * A list of allowed elements for this attribute can be passed as a second argument. + * Passing "*" will allow all currently allowed elements to use this attribute. + * + * @param list|string $allowedElements + */ + public function allowAttribute(string $attribute, array|string $allowedElements): static + { + $clone = clone $this; + $allowedElements = ('*' === $allowedElements) ? array_keys($clone->allowedElements) : (array) $allowedElements; + + // For each configured element ... + foreach ($clone->allowedElements as $element => $attrs) { + if (\in_array($element, $allowedElements, true)) { + // ... if the attribute should be allowed, add it + $clone->allowedElements[$element][$attribute] = true; + } else { + // ... if the attribute should not be allowed, remove it + unset($clone->allowedElements[$element][$attribute]); + } + } + + return $clone; + } + + /** + * Configures the given attribute as dropped. + * + * Dropped attributes are attributes the sanitizer should remove from the input. + * + * A list of elements on which to drop this attribute can be passed as a second argument. + * Passing "*" will drop this attribute from all currently allowed elements. + * + * Note: when using an empty configuration, all unknown attributes are dropped + * automatically. This method let you drop attributes that were allowed earlier + * in the configuration. + * + * @param list|string $droppedElements + */ + public function dropAttribute(string $attribute, array|string $droppedElements): static + { + $clone = clone $this; + $droppedElements = ('*' === $droppedElements) ? array_keys($clone->allowedElements) : (array) $droppedElements; + + foreach ($droppedElements as $element) { + if (isset($clone->allowedElements[$element][$attribute])) { + unset($clone->allowedElements[$element][$attribute]); + } + } + + return $clone; + } + + /** + * Forcefully set the value of a given attribute on a given element. + * + * The attribute will be created on the nodes if it didn't exist. + */ + public function forceAttribute(string $element, string $attribute, string $value): static + { + $clone = clone $this; + $clone->forcedAttributes[$element][$attribute] = $value; + + return $clone; + } + + /** + * Registers a custom attribute sanitizer. + */ + public function withAttributeSanitizer(AttributeSanitizerInterface $sanitizer): static + { + $clone = clone $this; + $clone->attributeSanitizers[] = $sanitizer; + + return $clone; + } + + /** + * Unregisters a custom attribute sanitizer. + */ + public function withoutAttributeSanitizer(AttributeSanitizerInterface $sanitizer): static + { + $clone = clone $this; + $clone->attributeSanitizers = array_values(array_filter( + $this->attributeSanitizers, + static fn ($current) => $current !== $sanitizer + )); + + return $clone; + } + + /** + * @return array> + */ + public function getAllowedElements(): array + { + return $this->allowedElements; + } + + /** + * @return array + */ + public function getBlockedElements(): array + { + return $this->blockedElements; + } + + /** + * @return array> + */ + public function getForcedAttributes(): array + { + return $this->forcedAttributes; + } + + /** + * @return list + */ + public function getAllowedLinkSchemes(): array + { + return $this->allowedLinkSchemes; + } + + /** + * @return list|null + */ + public function getAllowedLinkHosts(): ?array + { + return $this->allowedLinkHosts; + } + + public function getAllowRelativeLinks(): bool + { + return $this->allowRelativeLinks; + } + + /** + * @return list + */ + public function getAllowedMediaSchemes(): array + { + return $this->allowedMediaSchemes; + } + + /** + * @return list|null + */ + public function getAllowedMediaHosts(): ?array + { + return $this->allowedMediaHosts; + } + + public function getAllowRelativeMedias(): bool + { + return $this->allowRelativeMedias; + } + + public function getForceHttpsUrls(): bool + { + return $this->forceHttpsUrls; + } + + /** + * @return list + */ + public function getAttributeSanitizers(): array + { + return $this->attributeSanitizers; + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/HtmlSanitizerInterface.php b/src/Symfony/Component/HtmlSanitizer/HtmlSanitizerInterface.php new file mode 100644 index 0000000000000..559bcb6a46a98 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/HtmlSanitizerInterface.php @@ -0,0 +1,44 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer; + +/** + * Sanitizes an untrusted HTML input for safe insertion into a document's DOM. + * + * This interface is inspired by the W3C Standard Draft about a HTML Sanitizer API + * ({@see https://wicg.github.io/sanitizer-api/}). + * + * @author Titouan Galopin + * + * @experimental + */ +interface HtmlSanitizerInterface +{ + /** + * Sanitizes an untrusted HTML input for a context. + * + * This method is NOT context sensitive: it assumes the returned HTML string + * will be injected in a "body" context, and therefore will drop tags only + * allowed in the "head" element. To sanitize a string for injection + * in the "head" element, use {@see HtmlSanitizerInterface::sanitizeFor()}. + */ + public function sanitize(string $input): string; + + /** + * Sanitizes an untrusted HTML input for a given context. + * + * This method is context sensitive: by providing a parent element name + * (body, head, title, ...), the sanitizer will adapt its rules to only + * allow elements that are valid inside the given parent element. + */ + public function sanitizeFor(string $element, string $input): string; +} diff --git a/src/Symfony/Component/HtmlSanitizer/LICENSE b/src/Symfony/Component/HtmlSanitizer/LICENSE new file mode 100644 index 0000000000000..efb17f98e7dd3 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2021 Fabien Potencier + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished +to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/src/Symfony/Component/HtmlSanitizer/Parser/MastermindsParser.php b/src/Symfony/Component/HtmlSanitizer/Parser/MastermindsParser.php new file mode 100644 index 0000000000000..f9752fc04901f --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Parser/MastermindsParser.php @@ -0,0 +1,31 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Parser; + +use Masterminds\HTML5; + +/** + * @author Titouan Galopin + * + * @experimental + */ +final class MastermindsParser implements ParserInterface +{ + public function __construct(private array $defaultOptions = []) + { + } + + public function parse(string $html): ?\DOMNode + { + return (new HTML5($this->defaultOptions))->loadHTMLFragment($html); + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Parser/ParserInterface.php b/src/Symfony/Component/HtmlSanitizer/Parser/ParserInterface.php new file mode 100644 index 0000000000000..50d56fad6d3be --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Parser/ParserInterface.php @@ -0,0 +1,29 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Parser; + +/** + * Transforms an untrusted HTML input string into a DOM tree. + * + * @author Titouan Galopin + * + * @experimental + */ +interface ParserInterface +{ + /** + * Parse a given string and returns a DOMNode tree. + * + * This method must return null if the string cannot be parsed as HTML. + */ + public function parse(string $html): ?\DOMNode; +} diff --git a/src/Symfony/Component/HtmlSanitizer/README.md b/src/Symfony/Component/HtmlSanitizer/README.md new file mode 100644 index 0000000000000..dba14d9ab8622 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/README.md @@ -0,0 +1,115 @@ +HtmlSanitizer Component +======================= + +The HtmlSanitizer component provides an object-oriented API to sanitize +untrusted HTML input for safe insertion into a document's DOM. + +Usage +----- + +```php +use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; +use Symfony\Component\HtmlSanitizer\HtmlSanitizer; + +// By default, an element not added to the allowed or blocked elements +// will be dropped, including its children +$config = (new HtmlSanitizerConfig()) + // Allow "safe" elements and attributes. All scripts will be removed + // as well as other dangerous behaviors like CSS injection + ->allowSafeElements() + + // Allow all static elements and attributes from the W3C Sanitizer API + // standard. All scripts will be removed but the output may still contain + // other dangerous behaviors like CSS injection (click-jacking), CSS + // expressions, ... + ->allowAllStaticElements() + + // Allow the "div" element and no attribute can be on it + ->allowElement('div') + + // Allow the "a" element, and the "title" attribute to be on it + ->allowElement('a', ['title']) + + // Allow the "span" element, and any attribute from the Sanitizer API is allowed + // (see https://wicg.github.io/sanitizer-api/#default-configuration) + ->allowElement('span', '*') + + // Block the "section" element: this element will be removed but + // its children will be retained + ->blockElement('section') + + // Drop the "div" element: this element will be removed, including its children + ->dropElement('div') + + // Allow the attribute "title" on the "div" element + ->allowAttribute('title', ['div']) + + // Allow the attribute "data-custom-attr" on all currently allowed elements + ->allowAttribute('data-custom-attr', '*') + + // Drop the "data-custom-attr" attribute from the "div" element: + // this attribute will be removed + ->dropAttribute('data-custom-attr', ['div']) + + // Drop the "data-custom-attr" attribute from all elements: + // this attribute will be removed + ->dropAttribute('data-custom-attr', '*') + + // Forcefully set the value of all "rel" attributes on "a" + // elements to "noopener noreferrer" + ->forceAttribute('a', 'rel', 'noopener noreferrer') + + // Transform all HTTP schemes to HTTPS + ->forceHttpsUrls() + + // Configure which schemes are allowed in links (others will be dropped) + ->allowedLinkSchemes(['https', 'http', 'mailto']) + + // Configure which hosts are allowed in links (by default all are allowed) + ->allowedLinkHosts(['symfony.com', 'example.com']) + + // Allow relative URL in links (by default they are dropped) + ->allowRelativeLinks() + + // Configure which schemes are allowed in img/audio/video/iframe (others will be dropped) + ->allowedMediaSchemes(['https', 'http']) + + // Configure which hosts are allowed in img/audio/video/iframe (by default all are allowed) + ->allowedMediaHosts(['symfony.com', 'example.com']) + + // Allow relative URL in img/audio/video/iframe (by default they are dropped) + ->allowRelativeMedias() + + // Configure a custom attribute sanitizer to apply custom sanitization logic + // ($attributeSanitizer instance of AttributeSanitizerInterface) + ->withAttributeSanitizer($attributeSanitizer) + + // Unregister a previously registered attribute sanitizer + // ($attributeSanitizer instance of AttributeSanitizerInterface) + ->withoutAttributeSanitizer($attributeSanitizer) +; + +$sanitizer = new HtmlSanitizer($config); + +// Sanitize a given string, using the configuration provided and in the +// "body" context (tags only allowed in will be removed) +$sanitizer->sanitize($userInput); + +// Sanitize the given string for a usage in a tag +$sanitizer->sanitizeFor('head', $userInput); + +// Sanitize the given string for a usage in another tag +$sanitizer->sanitizeFor('title', $userInput); // Will encode as HTML entities +$sanitizer->sanitizeFor('textarea', $userInput); // Will encode as HTML entities +$sanitizer->sanitizeFor('div', $userInput); // Will sanitize as body +$sanitizer->sanitizeFor('section', $userInput); // Will sanitize as body +// ... +``` + +Resources +--------- + +* [Contributing](https://symfony.com/doc/current/contributing/index.html) +* [Report issues](https://github.com/symfony/symfony/issues) and + [send Pull Requests](https://github.com/symfony/symfony/pulls) + in the [main Symfony repository](https://github.com/symfony/symfony) diff --git a/src/Symfony/Component/HtmlSanitizer/Reference/W3CReference.php b/src/Symfony/Component/HtmlSanitizer/Reference/W3CReference.php new file mode 100644 index 0000000000000..8668bbf67e2ea --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Reference/W3CReference.php @@ -0,0 +1,400 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Reference; + +/** + * Stores reference data from the W3C Sanitizer API standard. + * + * @see https://wicg.github.io/sanitizer-api/#default-configuration + * + * @author Titouan Galopin + * + * @internal + */ +final class W3CReference +{ + /** + * Sanitizer supported contexts. + * + * A parent element name can be passed as an argument to {@see HtmlSanitizer::sanitizeFor()}. + * When doing so, depending on the given context, different elements will be allowed. + */ + public const CONTEXT_HEAD = 'head'; + public const CONTEXT_BODY = 'body'; + public const CONTEXT_TEXT = 'text'; + + // Which context to apply depending on the passed parent element name + public const CONTEXTS_MAP = [ + 'head' => self::CONTEXT_HEAD, + 'textarea' => self::CONTEXT_TEXT, + 'title' => self::CONTEXT_TEXT, + ]; + + /** + * Elements allowed by the Sanitizer standard in as keys, including whether + * they are safe or not as values (safe meaning no global display/audio/video impact). + */ + public const HEAD_ELEMENTS = [ + 'head' => true, + 'link' => true, + 'meta' => true, + 'style' => false, + 'title' => true, + ]; + + /** + * Elements allowed by the Sanitizer standard in as keys, including whether + * they are safe or not as values (safe meaning no global display/audio/video impact). + */ + public const BODY_ELEMENTS = [ + 'a' => true, + 'abbr' => true, + 'acronym' => true, + 'address' => true, + 'area' => true, + 'article' => true, + 'aside' => true, + 'audio' => true, + 'b' => true, + 'basefont' => true, + 'bdi' => true, + 'bdo' => true, + 'bgsound' => false, + 'big' => true, + 'blockquote' => true, + 'body' => true, + 'br' => true, + 'button' => true, + 'canvas' => true, + 'caption' => true, + 'center' => true, + 'cite' => true, + 'code' => true, + 'col' => true, + 'colgroup' => true, + 'command' => true, + 'data' => true, + 'datalist' => true, + 'dd' => true, + 'del' => true, + 'details' => true, + 'dfn' => true, + 'dialog' => true, + 'dir' => true, + 'div' => true, + 'dl' => true, + 'dt' => true, + 'em' => true, + 'fieldset' => true, + 'figcaption' => true, + 'figure' => true, + 'font' => true, + 'footer' => true, + 'form' => false, + 'h1' => true, + 'h2' => true, + 'h3' => true, + 'h4' => true, + 'h5' => true, + 'h6' => true, + 'header' => true, + 'hgroup' => true, + 'hr' => true, + 'html' => true, + 'i' => true, + 'image' => true, + 'img' => true, + 'input' => false, + 'ins' => true, + 'kbd' => true, + 'keygen' => true, + 'label' => true, + 'layer' => true, + 'legend' => true, + 'li' => true, + 'listing' => true, + 'main' => true, + 'map' => true, + 'mark' => true, + 'marquee' => true, + 'menu' => true, + 'meter' => true, + 'nav' => true, + 'nobr' => true, + 'ol' => true, + 'optgroup' => true, + 'option' => true, + 'output' => true, + 'p' => true, + 'picture' => true, + 'plaintext' => true, + 'popup' => true, + 'portal' => true, + 'pre' => true, + 'progress' => true, + 'q' => true, + 'rb' => true, + 'rp' => true, + 'rt' => true, + 'rtc' => true, + 'ruby' => true, + 's' => true, + 'samp' => true, + 'section' => true, + 'select' => false, + 'selectmenu' => false, + 'slot' => true, + 'small' => true, + 'source' => true, + 'span' => true, + 'strike' => true, + 'strong' => true, + 'sub' => true, + 'summary' => true, + 'sup' => true, + 'table' => true, + 'tbody' => true, + 'td' => true, + 'template' => true, + 'textarea' => false, + 'tfoot' => true, + 'th' => true, + 'thead' => true, + 'time' => true, + 'tr' => true, + 'track' => true, + 'tt' => true, + 'u' => true, + 'ul' => true, + 'var' => true, + 'video' => true, + 'wbr' => true, + 'xmp' => true, + ]; + + /** + * Attributes allowed by the standard. + */ + public const ATTRIBUTES = [ + 'abbr' => true, + 'accept' => true, + 'accept-charset' => true, + 'accesskey' => true, + 'action' => true, + 'align' => true, + 'alink' => true, + 'allow' => true, + 'allowfullscreen' => true, + 'allowpaymentrequest' => false, + 'alt' => true, + 'anchor' => true, + 'archive' => true, + 'as' => true, + 'async' => false, + 'autocapitalize' => false, + 'autocomplete' => false, + 'autocorrect' => false, + 'autofocus' => false, + 'autopictureinpicture' => false, + 'autoplay' => false, + 'axis' => true, + 'background' => false, + 'behavior' => true, + 'bgcolor' => false, + 'border' => false, + 'bordercolor' => false, + 'capture' => true, + 'cellpadding' => true, + 'cellspacing' => true, + 'challenge' => true, + 'char' => true, + 'charoff' => true, + 'charset' => true, + 'checked' => false, + 'cite' => true, + 'class' => false, + 'classid' => false, + 'clear' => true, + 'code' => true, + 'codebase' => true, + 'codetype' => true, + 'color' => false, + 'cols' => true, + 'colspan' => true, + 'compact' => true, + 'content' => true, + 'contenteditable' => false, + 'controls' => true, + 'controlslist' => true, + 'conversiondestination' => true, + 'coords' => true, + 'crossorigin' => true, + 'csp' => true, + 'data' => true, + 'datetime' => true, + 'declare' => true, + 'decoding' => true, + 'default' => true, + 'defer' => true, + 'dir' => true, + 'direction' => true, + 'dirname' => true, + 'disabled' => true, + 'disablepictureinpicture' => true, + 'disableremoteplayback' => true, + 'disallowdocumentaccess' => true, + 'download' => true, + 'draggable' => true, + 'elementtiming' => true, + 'enctype' => true, + 'end' => true, + 'enterkeyhint' => true, + 'event' => true, + 'exportparts' => true, + 'face' => true, + 'for' => true, + 'form' => false, + 'formaction' => false, + 'formenctype' => false, + 'formmethod' => false, + 'formnovalidate' => false, + 'formtarget' => false, + 'frame' => false, + 'frameborder' => false, + 'headers' => true, + 'height' => true, + 'hidden' => false, + 'high' => true, + 'href' => true, + 'hreflang' => true, + 'hreftranslate' => true, + 'hspace' => true, + 'http-equiv' => false, + 'id' => true, + 'imagesizes' => true, + 'imagesrcset' => true, + 'importance' => true, + 'impressiondata' => true, + 'impressionexpiry' => true, + 'incremental' => true, + 'inert' => true, + 'inputmode' => true, + 'integrity' => true, + 'invisible' => true, + 'is' => true, + 'ismap' => true, + 'keytype' => true, + 'kind' => true, + 'label' => true, + 'lang' => true, + 'language' => true, + 'latencyhint' => true, + 'leftmargin' => true, + 'link' => true, + 'list' => true, + 'loading' => true, + 'longdesc' => true, + 'loop' => true, + 'low' => true, + 'lowsrc' => true, + 'manifest' => true, + 'marginheight' => true, + 'marginwidth' => true, + 'max' => true, + 'maxlength' => true, + 'mayscript' => true, + 'media' => true, + 'method' => true, + 'min' => true, + 'minlength' => true, + 'multiple' => true, + 'muted' => true, + 'name' => true, + 'nohref' => true, + 'nomodule' => true, + 'nonce' => true, + 'noresize' => true, + 'noshade' => true, + 'novalidate' => true, + 'nowrap' => true, + 'object' => true, + 'open' => true, + 'optimum' => true, + 'part' => true, + 'pattern' => true, + 'ping' => false, + 'placeholder' => true, + 'playsinline' => true, + 'policy' => true, + 'poster' => true, + 'preload' => true, + 'pseudo' => true, + 'readonly' => true, + 'referrerpolicy' => true, + 'rel' => true, + 'reportingorigin' => true, + 'required' => true, + 'resources' => true, + 'rev' => true, + 'reversed' => true, + 'role' => true, + 'rows' => true, + 'rowspan' => true, + 'rules' => true, + 'sandbox' => true, + 'scheme' => true, + 'scope' => true, + 'scopes' => true, + 'scrollamount' => true, + 'scrolldelay' => true, + 'scrolling' => true, + 'select' => false, + 'selected' => false, + 'shadowroot' => true, + 'shadowrootdelegatesfocus' => true, + 'shape' => true, + 'size' => true, + 'sizes' => true, + 'slot' => true, + 'span' => true, + 'spellcheck' => true, + 'src' => true, + 'srcdoc' => true, + 'srclang' => true, + 'srcset' => true, + 'standby' => true, + 'start' => true, + 'step' => true, + 'style' => false, + 'summary' => true, + 'tabindex' => true, + 'target' => true, + 'text' => true, + 'title' => true, + 'topmargin' => true, + 'translate' => true, + 'truespeed' => true, + 'trusttoken' => true, + 'type' => true, + 'usemap' => true, + 'valign' => true, + 'value' => false, + 'valuetype' => true, + 'version' => true, + 'virtualkeyboardpolicy' => true, + 'vlink' => false, + 'vspace' => true, + 'webkitdirectory' => true, + 'width' => false, + 'wrap' => true, + ]; +} diff --git a/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php b/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php new file mode 100644 index 0000000000000..b3040817245c8 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php @@ -0,0 +1,554 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Tests; + +use PHPUnit\Framework\TestCase; +use Symfony\Component\HtmlSanitizer\HtmlSanitizer; +use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; + +class HtmlSanitizerAllTest extends TestCase +{ + private function createSanitizer(): HtmlSanitizer + { + return new HtmlSanitizer( + (new HtmlSanitizerConfig()) + ->allowAllStaticElements() + ->allowLinkHosts(['trusted.com', 'external.com']) + ->allowMediaHosts(['trusted.com', 'external.com']) + ->allowRelativeLinks() + ->allowRelativeMedias() + ->forceHttpsUrls() + ); + } + + /** + * @dataProvider provideSanitizeHead + */ + public function testSanitizeHead(string $input, string $expected) + { + $this->assertSame($expected, $this->createSanitizer()->sanitizeFor('head', $input)); + } + + public function provideSanitizeHead() + { + $cases = [ + // Scripts + [ + '', + '', + ], + + // Normal tags + [ + '', + '', + ], + [ + '', + '', + ], + ]; + + foreach ($cases as $case) { + yield $case[0] => $case; + } + } + + /** + * @dataProvider provideSanitizeBody + */ + public function testSanitizeBody(string $input, string $expected) + { + $this->assertSame($expected, $this->createSanitizer()->sanitize($input)); + } + + public function provideSanitizeBody() + { + $cases = [ + // Text + [ + 'hello world', + 'hello world', + ], + [ + '<hello world>', + '<hello world>', + ], + [ + '< Hello', + ' Hello', + ], + [ + 'Lorem & Ipsum', + 'Lorem & Ipsum', + ], + + // Unknown tag + [ + 'Lorem ipsum', + '', + ], + + // Scripts + [ + '', + '', + ], + [ + 'javascript:/*-->', + 'javascript:/*-->', + ], + [ + 'ipt>alert(1)', + '', + ], + [ + 'ipt>alert(1)', + '', + ], + [ + '', + '', + ], + [ + '
Lorem ipsum dolor sit amet, consectetur adipisicing elit.
', + '
Lorem ipsum dolor sit amet, consectetur adipisicing elit.
', + ], + [ + 'Lorem ipsum dolor sit amet, consectetur adipisicing elit.', + 'Lorem ipsum dolor sit amet, consectetur adipisicing elit.', + ], + [ + '<a href="javascript:evil"/>', + 'a href="javascript:evil"/>', + ], + [ + 'Test', + 'Test', + ], + [ + 'Test', + 'Test', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Test', + 'Test', + ], + [ + '
', + '
', + ], + [ + '', + '', + ], + [ + '<iframe src="javascript:evil"/>', + 'iframe src="javascript:evil"/>', + ], + [ + '<img src="javascript:evil"/>', + 'img src="javascript:evil"/>', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '"\>', + '"\>', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '
', + '
', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + '!!', + '!!', + ], + + // Inspired by https://www.youtube.com/watch?v=kz7wmRV9xsU + [ + '<script>alert(\'ok\');</script>', + '<script>alert('ok');</script>', + ], + + // Inspired by https://twitter.com/brutelogic/status/1066333383276593152?s=19 + [ + '">"@x.y', + '">', + ], + + // Styles + [ + '', + '', + ], + [ + '
Lorem ipsum dolor sit amet, consectetur.
', + '
Lorem ipsum dolor sit amet, consectetur.
', + ], + [ + '', + '', + ], + [ + 'Lorem ipsum dolor sit amet, consectetur.', + 'Lorem ipsum dolor sit amet, consectetur.', + ], + + // Comments + [ + 'Lorem ipsum dolor sit amet, consectetur', + 'Lorem ipsum dolor sit amet, consectetur', + ], + [ + 'Lorem ipsum ', + 'Lorem ipsum ', + ], + + // Normal tags + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + 'Lorem ipsum
dolor sit amet
consectetur adipisicing.', + 'Lorem ipsum
dolor sit amet
consectetur adipisicing.', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + '
Lorem ipsum dolor sit amet, consectetur adipisicing elit.
', + '
Lorem ipsum dolor sit amet, consectetur adipisicing elit.
', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + '

Lorem ipsum

', + '

Lorem ipsum

', + ], + [ + '

Lorem ipsum

', + '

Lorem ipsum

', + ], + [ + '

Lorem ipsum

', + '

Lorem ipsum

', + ], + [ + '

Lorem ipsum

', + '

Lorem ipsum

', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + '
Lorem ipsum
', + '
Lorem ipsum
', + ], + [ + '
', + '
', + ], + [ + 'Image alternative text', + 'Image alternative text', + ], + [ + 'Image alternative text', + 'Image alternative text', + ], + [ + 'Image alternative text', + 'Image alternative text', + ], + [ + '', + '', + ], + [ + '', + '', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + '
  • Lorem ipsum
  • ', + '
  • Lorem ipsum
  • ', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + '
      Lorem ipsum
    ', + '
      Lorem ipsum
    ', + ], + [ + '

    Lorem ipsum

    ', + '

    Lorem ipsum

    ', + ], + [ + '
    Lorem ipsum
    ', + '
    Lorem ipsum
    ', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + '', + '', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum
    ', + 'Lorem ipsum
    ', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + 'Lorem ipsum', + 'Lorem ipsum', + ], + [ + '
      Lorem ipsum
    ', + '
      Lorem ipsum
    ', + ], + ]; + + foreach ($cases as $case) { + yield $case[0] => $case; + } + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerConfigTest.php b/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerConfigTest.php new file mode 100644 index 0000000000000..b98af74d02818 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerConfigTest.php @@ -0,0 +1,295 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Tests; + +use PHPUnit\Framework\TestCase; +use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; +use Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer\AttributeSanitizerInterface; + +class HtmlSanitizerConfigTest extends TestCase +{ + public function testCreateEmpty() + { + $config = new HtmlSanitizerConfig(); + $this->assertSame([], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + $this->assertSame(['http', 'https', 'mailto', 'tel'], $config->getAllowedLinkSchemes()); + $this->assertNull($config->getAllowedLinkHosts()); + $this->assertSame(['http', 'https', 'data'], $config->getAllowedMediaSchemes()); + $this->assertNull($config->getAllowedMediaHosts()); + $this->assertFalse($config->getForceHttpsUrls()); + } + + public function testSimpleOptions() + { + $config = new HtmlSanitizerConfig(); + $this->assertSame(['http', 'https', 'mailto', 'tel'], $config->getAllowedLinkSchemes()); + $this->assertNull($config->getAllowedLinkHosts()); + $this->assertSame(['http', 'https', 'data'], $config->getAllowedMediaSchemes()); + $this->assertNull($config->getAllowedMediaHosts()); + $this->assertFalse($config->getForceHttpsUrls()); + + $config = $config->allowLinkSchemes(['http', 'ftp']); + $this->assertSame(['http', 'ftp'], $config->getAllowedLinkSchemes()); + + $config = $config->allowLinkHosts(['symfony.com', 'example.com']); + $this->assertSame(['symfony.com', 'example.com'], $config->getAllowedLinkHosts()); + + $config = $config->allowRelativeLinks(); + $this->assertTrue($config->getAllowRelativeLinks()); + + $config = $config->allowMediaSchemes(['https']); + $this->assertSame(['https'], $config->getAllowedMediaSchemes()); + + $config = $config->allowMediaHosts(['symfony.com']); + $this->assertSame(['symfony.com'], $config->getAllowedMediaHosts()); + + $config = $config->allowRelativeMedias(); + $this->assertTrue($config->getAllowRelativeMedias()); + + $config = $config->forceHttpsUrls(); + $this->assertTrue($config->getForceHttpsUrls()); + } + + public function testAllowElement() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div', ['style']); + $this->assertSame(['div' => ['style' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowElementTwiceOverridesIt() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div', ['style']); + $config = $config->allowElement('div', ['width']); + $this->assertSame(['div' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + + // Allowing a blocked element should remove it from blocked + $config = $config->blockElement('div'); + $this->assertSame(['div' => true], $config->getBlockedElements()); + + $config = $config->allowElement('div', ['width']); + $this->assertSame(['div' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowBlockedElementUnblocksIt() + { + $config = new HtmlSanitizerConfig(); + $config = $config->blockElement('div'); + $this->assertSame(['div' => true], $config->getBlockedElements()); + + $config = $config->allowElement('div', ['width']); + $this->assertSame(['div' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowElementNoAttributes() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div', []); + $this->assertSame(['div' => []], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowElementStandardAttributes() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div', '*'); + $this->assertSame(['div'], array_keys($config->getAllowedElements())); + $this->assertCount(211, $config->getAllowedElements()['div']); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowElementStringAttribute() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div', 'width'); + $this->assertSame(['div' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testBlockElement() + { + $config = new HtmlSanitizerConfig(); + $config = $config->blockElement('div'); + $this->assertSame(['div' => true], $config->getBlockedElements()); + } + + public function testBlockElementDisallowsIt() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div', 'width'); + $this->assertSame(['div' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + + $config = $config->blockElement('div'); + $this->assertSame([], $config->getAllowedElements()); + $this->assertSame(['div' => true], $config->getBlockedElements()); + } + + public function testDropAllowedElement() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div', 'width'); + $this->assertSame(['div' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + + $config = $config->dropElement('div'); + $this->assertSame([], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testDropBlockedElement() + { + $config = new HtmlSanitizerConfig(); + $config = $config->blockElement('div'); + $this->assertSame([], $config->getAllowedElements()); + $this->assertSame(['div' => true], $config->getBlockedElements()); + + $config = $config->dropElement('div'); + $this->assertSame([], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowAttributeNoElement() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowAttribute('width', 'div'); + $this->assertSame([], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowAttributeAllowedElement() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div'); + $config = $config->allowAttribute('width', 'div'); + $this->assertSame(['div' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowAttributeAllElements() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div'); + $config = $config->allowElement('section'); + $config = $config->allowAttribute('width', '*'); + $this->assertSame(['div' => ['width' => true], 'section' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowAttributeElementsArray() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div'); + $config = $config->allowElement('section'); + $config = $config->allowAttribute('width', ['section']); + $this->assertSame(['div' => [], 'section' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowAttributeElementsString() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div'); + $config = $config->allowElement('section'); + $config = $config->allowAttribute('width', 'section'); + $this->assertSame(['div' => [], 'section' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testAllowAttributeOverridesIt() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div'); + $config = $config->allowElement('section'); + + $config = $config->allowAttribute('width', 'div'); + $this->assertSame(['div' => ['width' => true], 'section' => []], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + + $config = $config->allowAttribute('width', 'section'); + $this->assertSame(['div' => [], 'section' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testDropAllowedAttributeAllowedElementsArray() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div', 'width'); + $config = $config->allowElement('section', 'width'); + $this->assertSame(['div' => ['width' => true], 'section' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + + $config = $config->dropAttribute('width', ['div']); + $this->assertSame(['div' => [], 'section' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testDropAllowedAttributeAllowedElementString() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div', 'width'); + $config = $config->allowElement('section', 'width'); + $this->assertSame(['div' => ['width' => true], 'section' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + + $config = $config->dropAttribute('width', 'section'); + $this->assertSame(['div' => ['width' => true], 'section' => []], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testDropAllowedAttributeAllElements() + { + $config = new HtmlSanitizerConfig(); + $config = $config->allowElement('div', 'width'); + $config = $config->allowElement('section', 'width'); + $this->assertSame(['div' => ['width' => true], 'section' => ['width' => true]], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + + $config = $config->dropAttribute('width', '*'); + $this->assertSame(['div' => [], 'section' => []], $config->getAllowedElements()); + $this->assertSame([], $config->getBlockedElements()); + } + + public function testWithWithoutAttributeSanitizer() + { + $config = new HtmlSanitizerConfig(); + + $sanitizer = new class() implements AttributeSanitizerInterface { + public function getSupportedElements(): ?array + { + return null; + } + + public function getSupportedAttributes(): ?array + { + return null; + } + + public function sanitizeAttribute(string $element, string $attribute, string $value, HtmlSanitizerConfig $config): ?string + { + return ''; + } + }; + + $config = $config->withAttributeSanitizer($sanitizer); + $this->assertContains($sanitizer, $config->getAttributeSanitizers()); + + $config = $config->withoutAttributeSanitizer($sanitizer); + $this->assertNotContains($sanitizer, $config->getAttributeSanitizers()); + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerCustomTest.php b/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerCustomTest.php new file mode 100644 index 0000000000000..f44c62414f4f4 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerCustomTest.php @@ -0,0 +1,428 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Tests; + +use PHPUnit\Framework\TestCase; +use Symfony\Component\HtmlSanitizer\HtmlSanitizer; +use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; +use Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer\AttributeSanitizerInterface; + +class HtmlSanitizerCustomTest extends TestCase +{ + public function testSanitizeForHead() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ; + + $this->assertSame( + ' world', + (new HtmlSanitizer($config))->sanitizeFor('head', '
    Hello
    world') + ); + } + + public function testSanitizeForTextarea() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ; + + $this->assertSame( + '<div style="width: 100px">Hello</div> world', + (new HtmlSanitizer($config))->sanitizeFor('textarea', '
    Hello
    world') + ); + } + + public function testSanitizeForTitle() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ; + + $this->assertSame( + '<div style="width: 100px">Hello</div> world', + (new HtmlSanitizer($config))->sanitizeFor('title', '
    Hello
    world') + ); + } + + public function testSanitizeDeepNestedString() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ; + + $this->assertNotEmpty($this->sanitize($config, str_repeat('
    T', 10000))); + } + + public function testSanitizeNullByte() + { + $this->assertSame('Null byte', $this->sanitize(new HtmlSanitizerConfig(), "Null byte\0")); + $this->assertSame('Null byte', $this->sanitize(new HtmlSanitizerConfig(), 'Null byte�')); + } + + public function testSanitizeDefaultBody() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ; + + $this->assertSame( + '
    Hello
    world', + (new HtmlSanitizer($config))->sanitize('
    Hello
    world') + ); + } + + public function testAllowElement() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ; + + $this->assertSame( + '
    Hello
    world', + $this->sanitize($config, '
    Hello
    world') + ); + + $this->assertSame( + ' world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testAllowElementWithAttribute() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div', ['style']) + ; + + $this->assertSame( + '
    Hello
    world', + $this->sanitize($config, '
    Hello
    world') + ); + + $this->assertSame( + ' world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testBlockElement() + { + $config = (new HtmlSanitizerConfig()) + ->blockElement('div') + ; + + $this->assertSame( + 'Hello world', + $this->sanitize($config, '
    Hello
    world') + ); + + $this->assertSame( + ' world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testDropElement() + { + $config = (new HtmlSanitizerConfig()) + ->blockElement('div') + ->dropElement('div') + ; + + $this->assertSame( + ' world', + $this->sanitize($config, '
    Hello
    world') + ); + + $this->assertSame( + ' world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testAllowAttributeOnElement() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ->allowElement('span') + ->allowAttribute('style', ['div']) + ; + + $this->assertSame( + '
    Hello
    world', + $this->sanitize($config, '
    Hello
    world') + ); + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testAllowAttributeEverywhere() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ->allowElement('span') + ->allowAttribute('style', '*') + ; + + $this->assertSame( + '
    Hello
    world', + $this->sanitize($config, '
    Hello
    world') + ); + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testDropAttributeOnElement() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ->allowElement('span') + ->allowAttribute('style', '*') + ->dropAttribute('style', 'span') + ; + + $this->assertSame( + '
    Hello
    world', + $this->sanitize($config, '
    Hello
    world') + ); + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testDropAttributeEverywhere() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ->allowElement('span') + ->allowAttribute('style', '*') + ->dropAttribute('style', '*') + ; + + $this->assertSame( + '
    Hello
    world', + $this->sanitize($config, '
    Hello
    world') + ); + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testForceAttribute() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div') + ->allowElement('a', ['href']) + ->forceAttribute('a', 'rel', 'noopener noreferrer') + ; + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + + $this->assertSame( + '
    Hello
    world', + $this->sanitize($config, '
    Hello
    world') + ); + } + + public function testForceHttps() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('a', ['href']) + ->forceHttpsUrls() + ; + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testAllowLinksSchemes() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('a', ['href']) + ->allowLinkSchemes(['https']) + ; + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testAllowLinksHosts() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('a', ['href']) + ->allowLinkHosts(['trusted.com']) + ; + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testAllowLinksRelative() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('a', ['href']) + ->allowRelativeLinks() + ; + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + + $this->assertSame( + 'Hello world', + $this->sanitize($config, 'Hello world') + ); + } + + public function testAllowMediaSchemes() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('img', ['src']) + ->allowMediaSchemes(['https']) + ; + + $this->assertSame( + '', + $this->sanitize($config, '') + ); + + $this->assertSame( + '', + $this->sanitize($config, '') + ); + + $this->assertSame( + '', + $this->sanitize($config, '') + ); + } + + public function testAllowMediasHosts() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('img', ['src']) + ->allowMediaHosts(['trusted.com']) + ; + + $this->assertSame( + '', + $this->sanitize($config, '') + ); + + $this->assertSame( + '', + $this->sanitize($config, '') + ); + + $this->assertSame( + '', + $this->sanitize($config, '') + ); + } + + public function testAllowMediasRelative() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('img', ['src']) + ->allowRelativeMedias() + ; + + $this->assertSame( + '', + $this->sanitize($config, '') + ); + + $this->assertSame( + '', + $this->sanitize($config, '') + ); + } + + public function testCustomAttributeSanitizer() + { + $config = (new HtmlSanitizerConfig()) + ->allowElement('div', ['data-attr']) + ->withAttributeSanitizer(new class() implements AttributeSanitizerInterface { + public function getSupportedElements(): ?array + { + return ['div']; + } + + public function getSupportedAttributes(): ?array + { + return ['data-attr']; + } + + public function sanitizeAttribute(string $element, string $attribute, string $value, HtmlSanitizerConfig $config): ?string + { + return 'new value'; + } + }) + ; + + $this->assertSame( + '
    Hello world
    ', + $this->sanitize($config, '
    Hello world
    ') + ); + } + + private function sanitize(HtmlSanitizerConfig $config, string $input): string + { + return (new HtmlSanitizer($config))->sanitize($input); + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Tests/Parser/MastermindsParserTest.php b/src/Symfony/Component/HtmlSanitizer/Tests/Parser/MastermindsParserTest.php new file mode 100644 index 0000000000000..a013d44ca9ed5 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Tests/Parser/MastermindsParserTest.php @@ -0,0 +1,27 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Tests\Parser; + +use PHPUnit\Framework\TestCase; +use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser; + +class MastermindsParserTest extends TestCase +{ + public function testParseValid() + { + $node = (new MastermindsParser())->parse('
    '); + $this->assertInstanceOf(\DOMNode::class, $node); + $this->assertSame('#document-fragment', $node->nodeName); + $this->assertCount(1, $node->childNodes); + $this->assertSame('div', $node->childNodes->item(0)->nodeName); + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Tests/Reference/W3CReferenceTest.php b/src/Symfony/Component/HtmlSanitizer/Tests/Reference/W3CReferenceTest.php new file mode 100644 index 0000000000000..9749b851e7f6b --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Tests/Reference/W3CReferenceTest.php @@ -0,0 +1,55 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Tests\Reference; + +use PHPUnit\Framework\TestCase; +use Symfony\Component\HtmlSanitizer\Reference\W3CReference; + +/** + * Check that the W3CReference class is up to date with the standard resources. + * + * @see https://github.com/WICG/sanitizer-api/blob/main/resources + */ +class W3CReferenceTest extends TestCase +{ + private const STANDARD_RESOURCES = [ + 'elements' => 'https://raw.githubusercontent.com/WICG/sanitizer-api/main/resources/baseline-element-allow-list.json', + 'attributes' => 'https://raw.githubusercontent.com/WICG/sanitizer-api/main/resources/baseline-attribute-allow-list.json', + ]; + + public function testElements() + { + if (!\in_array('https', stream_get_wrappers(), true)) { + $this->markTestSkipped('"https" stream wrapper is not enabled.'); + } + + $referenceElements = array_values(array_merge(array_keys(W3CReference::HEAD_ELEMENTS), array_keys(W3CReference::BODY_ELEMENTS))); + sort($referenceElements); + + $this->assertSame( + json_decode(file_get_contents(self::STANDARD_RESOURCES['elements']), true, 512, \JSON_THROW_ON_ERROR), + $referenceElements + ); + } + + public function testAttributes() + { + if (!\in_array('https', stream_get_wrappers(), true)) { + $this->markTestSkipped('"https" stream wrapper is not enabled.'); + } + + $this->assertSame( + json_decode(file_get_contents(self::STANDARD_RESOURCES['attributes']), true, 512, \JSON_THROW_ON_ERROR), + array_keys(W3CReference::ATTRIBUTES) + ); + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/StringSanitizerTest.php b/src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/StringSanitizerTest.php new file mode 100644 index 0000000000000..a8149f2df3e95 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/StringSanitizerTest.php @@ -0,0 +1,76 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Tests\TextSanitizer; + +use PHPUnit\Framework\TestCase; +use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; + +class StringSanitizerTest extends TestCase +{ + public function provideHtmlLower() + { + $cases = [ + 'exampleAttr' => 'exampleattr', + 'aTTrΔ' => 'attrΔ', + 'data-attr' => 'data-attr', + 'test with space' => 'test with space', + ]; + + foreach ($cases as $input => $expected) { + yield $input => [$input, $expected]; + } + } + + /** + * @dataProvider provideHtmlLower + */ + public function testHtmlLower(string $input, string $expected) + { + $this->assertSame($expected, StringSanitizer::htmlLower($input)); + } + + public function provideEncodeHtmlEntites() + { + $cases = [ + '' => '', + '"' => '"', + '\'' => ''', + '&' => '&', + '<' => '<', + '>' => '>', + '<' => '&lt;', + '>' => '&gt;', + '+' => '+', + '=' => '=', + '@' => '@', + '`' => '`', + '<' => '<', + '>' => '>', + '+' => '+', + '=' => '=', + '@' => '@', + '`' => '`', + ]; + + foreach ($cases as $input => $expected) { + yield $input => [$input, $expected]; + } + } + + /** + * @dataProvider provideEncodeHtmlEntites + */ + public function testEncodeHtmlEntites(string $input, string $expected) + { + $this->assertSame($expected, StringSanitizer::encodeHtmlEntities($input)); + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/UrlSanitizerTest.php b/src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/UrlSanitizerTest.php new file mode 100644 index 0000000000000..3216244e9ed10 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/UrlSanitizerTest.php @@ -0,0 +1,783 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Tests\TextSanitizer; + +use PHPUnit\Framework\TestCase; +use Symfony\Component\HtmlSanitizer\TextSanitizer\UrlSanitizer; + +class UrlSanitizerTest extends TestCase +{ + /** + * @dataProvider provideSanitize + */ + public function testSanitize(?string $input, ?array $allowedSchemes, ?array $allowedHosts, bool $forceHttps, bool $allowRelative, ?string $expected) + { + $this->assertSame($expected, UrlSanitizer::sanitize($input, $allowedSchemes, $forceHttps, $allowedHosts, $allowRelative)); + } + + public function provideSanitize() + { + // Simple accepted cases + yield [ + 'input' => '', + 'allowedSchemes' => ['https'], + 'allowedHosts' => null, + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + yield [ + 'input' => ':invalid', + 'allowedSchemes' => ['https'], + 'allowedHosts' => null, + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + yield [ + 'input' => 'https://trusted.com/link.php', + 'allowedSchemes' => ['https'], + 'allowedHosts' => null, + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => 'https://trusted.com/link.php', + ]; + + yield [ + 'input' => 'https://trusted.com/link.php', + 'allowedSchemes' => ['https'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => 'https://trusted.com/link.php', + ]; + + yield [ + 'input' => 'http://trusted.com/link.php', + 'allowedSchemes' => ['http'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => 'http://trusted.com/link.php', + ]; + + yield [ + 'input' => 'data:text/plain;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', + 'allowedSchemes' => ['data'], + 'allowedHosts' => null, + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => 'data:text/plain;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', + ]; + + // Simple filtered cases + yield [ + 'input' => 'ws://trusted.com/link.php', + 'allowedSchemes' => ['http'], + 'allowedHosts' => null, + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + yield [ + 'input' => 'http:link.php', + 'allowedSchemes' => ['http'], + 'allowedHosts' => null, + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + yield [ + 'input' => 'http:link.php', + 'allowedSchemes' => ['http'], + 'allowedHosts' => null, + 'forceHttps' => false, + 'allowRelative' => true, + 'output' => 'http:link.php', + ]; + + yield [ + 'input' => 'ws://trusted.com/link.php', + 'allowedSchemes' => ['http'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + yield [ + 'input' => 'https://trusted.com/link.php', + 'allowedSchemes' => ['http'], + 'allowedHosts' => null, + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + yield [ + 'input' => 'https://untrusted.com/link.php', + 'allowedSchemes' => ['https'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + yield [ + 'input' => 'http://untrusted.com/link.php', + 'allowedSchemes' => ['http'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + yield [ + 'input' => 'data:text/plain;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', + 'allowedSchemes' => ['http'], + 'allowedHosts' => null, + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + yield [ + 'input' => 'data:text/plain;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', + 'allowedSchemes' => ['http'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + // Allow null host (data scheme for instance) + yield [ + 'input' => 'data:text/plain;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', + 'allowedSchemes' => ['http', 'https', 'data'], + 'allowedHosts' => ['trusted.com', null], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => 'data:text/plain;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', + ]; + + // Force HTTPS + yield [ + 'input' => 'http://trusted.com/link.php', + 'allowedSchemes' => ['http', 'https'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => true, + 'allowRelative' => false, + 'output' => 'https://trusted.com/link.php', + ]; + + yield [ + 'input' => 'https://trusted.com/link.php', + 'allowedSchemes' => ['http', 'https'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => true, + 'allowRelative' => false, + 'output' => 'https://trusted.com/link.php', + ]; + + yield [ + 'input' => 'data:text/plain;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', + 'allowedSchemes' => ['http', 'https', 'data'], + 'allowedHosts' => null, + 'forceHttps' => true, + 'allowRelative' => false, + 'output' => 'data:text/plain;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', + ]; + + yield [ + 'input' => 'data:text/plain;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', + 'allowedSchemes' => ['http', 'https', 'data'], + 'allowedHosts' => ['trusted.com', null], + 'forceHttps' => true, + 'allowRelative' => false, + 'output' => 'data:text/plain;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7', + ]; + + // Domain matching + yield [ + 'input' => 'https://subdomain.trusted.com/link.php', + 'allowedSchemes' => ['http', 'https'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => 'https://subdomain.trusted.com/link.php', + ]; + + yield [ + 'input' => 'https://subdomain.trusted.com.untrusted.com/link.php', + 'allowedSchemes' => ['http', 'https'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + yield [ + 'input' => 'https://deep.subdomain.trusted.com/link.php', + 'allowedSchemes' => ['http', 'https'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => 'https://deep.subdomain.trusted.com/link.php', + ]; + + yield [ + 'input' => 'https://deep.subdomain.trusted.com.untrusted.com/link.php', + 'allowedSchemes' => ['http', 'https'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => false, + 'allowRelative' => false, + 'output' => null, + ]; + + // Allow relative + yield [ + 'input' => '/link.php', + 'allowedSchemes' => ['http', 'https'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => true, + 'allowRelative' => true, + 'output' => '/link.php', + ]; + + yield [ + 'input' => '/link.php', + 'allowedSchemes' => ['http', 'https'], + 'allowedHosts' => ['trusted.com'], + 'forceHttps' => true, + 'allowRelative' => false, + 'output' => null, + ]; + } + + /** + * @dataProvider provideParse + */ + public function testParse(string $url, ?array $expected) + { + $parsed = UrlSanitizer::parse($url); + + if (null === $expected) { + $this->assertNull($parsed); + } else { + $this->assertIsArray($parsed); + $this->assertArrayHasKey('scheme', $parsed); + $this->assertArrayHasKey('host', $parsed); + $this->assertSame($expected['scheme'], $parsed['scheme']); + $this->assertSame($expected['host'], $parsed['host']); + } + } + + public function provideParse(): iterable + { + $urls = [ + '' => null, + + // Simple tests + 'https://trusted.com/link.php' => ['scheme' => 'https', 'host' => 'trusted.com'], + 'https://trusted.com/link.php?query=1#foo' => ['scheme' => 'https', 'host' => 'trusted.com'], + 'https://subdomain.trusted.com/link' => ['scheme' => 'https', 'host' => 'subdomain.trusted.com'], + '//trusted.com/link.php' => ['scheme' => null, 'host' => 'trusted.com'], + 'https:trusted.com/link.php' => ['scheme' => 'https', 'host' => null], + 'https://untrusted.com/link' => ['scheme' => 'https', 'host' => 'untrusted.com'], + + // Ensure https://bugs.php.net/bug.php?id=73192 is handled + 'https://untrusted.com:80?@trusted.com/' => ['scheme' => 'https', 'host' => 'untrusted.com'], + 'https://untrusted.com:80#@trusted.com/' => ['scheme' => 'https', 'host' => 'untrusted.com'], + + // Ensure https://medium.com/secjuice/php-ssrf-techniques-9d422cb28d51 is handled + '0://untrusted.com;trusted.com' => null, + '0://untrusted.com:80;trusted.com:80' => null, + '0://untrusted.com:80,trusted.com:80' => null, + + // Data-URI + 'data:text/plain;base64,SSBsb3ZlIFBIUAo' => ['scheme' => 'data', 'host' => null], + 'data:text/plain;base64,SSBsb3ZlIFBIUAo=trusted.com' => ['scheme' => 'data', 'host' => null], + 'data:http://trusted.com' => ['scheme' => 'data', 'host' => null], + 'data://text/plain;base64,SSBsb3ZlIFBIUAo=trusted.com' => ['scheme' => 'data', 'host' => 'text'], + 'data://image/png;base64,SSBsb3ZlIFBIUAo=trusted.com' => ['scheme' => 'data', 'host' => 'image'], + 'data:google.com/plain;base64,SSBsb3ZlIFBIUAo=' => ['scheme' => 'data', 'host' => null], + 'data://google.com/plain;base64,SSBsb3ZlIFBIUAo=' => ['scheme' => 'data', 'host' => 'google.com'], + + // Inspired by https://github.com/punkave/sanitize-html/blob/master/test/test.js + "java\0\t\r\n script:alert(\'foo\')" => null, + 'javascript:alert(\\\'foo\\\')' => ['scheme' => null, 'host' => null], + 'java�script:alert(\\\'foo\\\')' => ['scheme' => null, 'host' => null], + 'javascript:alert(\'foo\')' => null, + + // Extracted from https://github.com/web-platform-tests/wpt/blob/master/url/resources/urltestdata.json + "http://example .\norg" => null, + 'http://user:pass@foo:21/bar;par?b#c' => ['scheme' => 'http', 'host' => 'foo'], + 'https://trusted.com:@untrusted.com' => ['scheme' => 'https', 'host' => 'untrusted.com'], + 'https://:@untrusted.com' => ['scheme' => 'https', 'host' => 'untrusted.com'], + 'non-special://test:@untrusted.com/x' => ['scheme' => 'non-special', 'host' => 'untrusted.com'], + 'non-special://:@untrusted.com/x' => ['scheme' => 'non-special', 'host' => 'untrusted.com'], + 'http:foo.com' => ['scheme' => 'http', 'host' => null], + " :foo.com \n" => null, + ' foo.com ' => ['scheme' => null, 'host' => null], + 'a: foo.com' => null, + 'http://f:21/ b ? d # e ' => ['scheme' => 'http', 'host' => 'f'], + 'lolscheme:x x#x x' => ['scheme' => 'lolscheme', 'host' => null], + 'http://f:/c' => ['scheme' => 'http', 'host' => 'f'], + 'http://f:0/c' => ['scheme' => 'http', 'host' => 'f'], + 'http://f:00000000000000/c' => ['scheme' => 'http', 'host' => 'f'], + 'http://f:00000000000000000000080/c' => ['scheme' => 'http', 'host' => 'f'], + "http://f:\n/c" => null, + ' ' => null, + ':foo.com/' => null, + ':foo.com\\' => ['scheme' => null, 'host' => null], + ':' => ['scheme' => null, 'host' => null], + ':a' => ['scheme' => null, 'host' => null], + ':/' => null, + ':\\' => ['scheme' => null, 'host' => null], + ':#' => ['scheme' => null, 'host' => null], + '#' => ['scheme' => null, 'host' => null], + '#/' => ['scheme' => null, 'host' => null], + '#\\' => ['scheme' => null, 'host' => null], + '#;?' => ['scheme' => null, 'host' => null], + '?' => ['scheme' => null, 'host' => null], + '/' => ['scheme' => null, 'host' => null], + ':23' => ['scheme' => null, 'host' => null], + '/:23' => ['scheme' => null, 'host' => null], + '::' => ['scheme' => null, 'host' => null], + '::23' => ['scheme' => null, 'host' => null], + 'foo://' => ['scheme' => 'foo', 'host' => ''], + 'http://a:b@c:29/d' => ['scheme' => 'http', 'host' => 'c'], + 'http::@c:29' => ['scheme' => 'http', 'host' => null], + 'http://&a:foo(b]c@d:2/' => ['scheme' => 'http', 'host' => 'd'], + 'http://::@c@d:2' => null, + 'http://foo.com:b@d/' => ['scheme' => 'http', 'host' => 'd'], + 'http://foo.com/\\@' => ['scheme' => 'http', 'host' => 'foo.com'], + 'http:\\foo.com\\' => ['scheme' => 'http', 'host' => null], + 'http:\\a\\b:c\\d@foo.com\\' => ['scheme' => 'http', 'host' => null], + 'foo:/' => ['scheme' => 'foo', 'host' => null], + 'foo:/bar.com/' => ['scheme' => 'foo', 'host' => null], + 'foo://///////' => ['scheme' => 'foo', 'host' => ''], + 'foo://///////bar.com/' => ['scheme' => 'foo', 'host' => ''], + 'foo:////://///' => ['scheme' => 'foo', 'host' => ''], + 'c:/foo' => ['scheme' => 'c', 'host' => null], + '//foo/bar' => ['scheme' => null, 'host' => 'foo'], + 'http://foo/path;a??e#f#g' => ['scheme' => 'http', 'host' => 'foo'], + 'http://foo/abcd?efgh?ijkl' => ['scheme' => 'http', 'host' => 'foo'], + 'http://foo/abcd#foo?bar' => ['scheme' => 'http', 'host' => 'foo'], + '[61:24:74]:98' => null, + 'http:[61:27]/:foo' => ['scheme' => 'http', 'host' => null], + 'http://[2001::1]' => ['scheme' => 'http', 'host' => '[2001::1]'], + 'http://[::127.0.0.1]' => ['scheme' => 'http', 'host' => '[::127.0.0.1]'], + 'http://[0:0:0:0:0:0:13.1.68.3]' => ['scheme' => 'http', 'host' => '[0:0:0:0:0:0:13.1.68.3]'], + 'http://[2001::1]:80' => ['scheme' => 'http', 'host' => '[2001::1]'], + 'http:/example.com/' => ['scheme' => 'http', 'host' => null], + 'ftp:/example.com/' => ['scheme' => 'ftp', 'host' => null], + 'https:/example.com/' => ['scheme' => 'https', 'host' => null], + 'madeupscheme:/example.com/' => ['scheme' => 'madeupscheme', 'host' => null], + 'file:/example.com/' => ['scheme' => 'file', 'host' => null], + 'ftps:/example.com/' => ['scheme' => 'ftps', 'host' => null], + 'gopher:/example.com/' => ['scheme' => 'gopher', 'host' => null], + 'ws:/example.com/' => ['scheme' => 'ws', 'host' => null], + 'wss:/example.com/' => ['scheme' => 'wss', 'host' => null], + 'data:/example.com/' => ['scheme' => 'data', 'host' => null], + 'javascript:/example.com/' => ['scheme' => 'javascript', 'host' => null], + 'mailto:/example.com/' => ['scheme' => 'mailto', 'host' => null], + 'http:example.com/' => ['scheme' => 'http', 'host' => null], + 'ftp:example.com/' => ['scheme' => 'ftp', 'host' => null], + 'https:example.com/' => ['scheme' => 'https', 'host' => null], + 'madeupscheme:example.com/' => ['scheme' => 'madeupscheme', 'host' => null], + 'ftps:example.com/' => ['scheme' => 'ftps', 'host' => null], + 'gopher:example.com/' => ['scheme' => 'gopher', 'host' => null], + 'ws:example.com/' => ['scheme' => 'ws', 'host' => null], + 'wss:example.com/' => ['scheme' => 'wss', 'host' => null], + 'data:example.com/' => ['scheme' => 'data', 'host' => null], + 'javascript:example.com/' => ['scheme' => 'javascript', 'host' => null], + 'mailto:example.com/' => ['scheme' => 'mailto', 'host' => null], + '/a/b/c' => ['scheme' => null, 'host' => null], + '/a/ /c' => ['scheme' => null, 'host' => null], + '/a%2fc' => ['scheme' => null, 'host' => null], + '/a/%2f/c' => ['scheme' => null, 'host' => null], + '#β' => ['scheme' => null, 'host' => null], + 'data:text/html,test#test' => ['scheme' => 'data', 'host' => null], + 'tel:1234567890' => ['scheme' => 'tel', 'host' => null], + 'ssh://example.com/foo/bar.git' => ['scheme' => 'ssh', 'host' => 'example.com'], + "file:c:\foo\bar.html" => null, + ' File:c|////foo\\bar.html' => null, + 'C|/foo/bar' => ['scheme' => null, 'host' => null], + "/C|\foo\bar" => null, + '//C|/foo/bar' => null, + '//server/file' => ['scheme' => null, 'host' => 'server'], + "\\server\file" => null, + '/\\server/file' => ['scheme' => null, 'host' => null], + 'file:///foo/bar.txt' => ['scheme' => 'file', 'host' => ''], + 'file:///home/me' => ['scheme' => 'file', 'host' => ''], + '//' => ['scheme' => null, 'host' => ''], + '///' => ['scheme' => null, 'host' => ''], + '///test' => ['scheme' => null, 'host' => ''], + 'file://test' => ['scheme' => 'file', 'host' => 'test'], + 'file://localhost' => ['scheme' => 'file', 'host' => 'localhost'], + 'file://localhost/' => ['scheme' => 'file', 'host' => 'localhost'], + 'file://localhost/test' => ['scheme' => 'file', 'host' => 'localhost'], + 'test' => ['scheme' => null, 'host' => null], + 'file:test' => ['scheme' => 'file', 'host' => null], + 'http://example.com/././foo' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/./.foo' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/.' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/./' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/bar/..' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/bar/../' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/..bar' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/bar/../ton' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/bar/../ton/../../a' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/../../..' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/../../../ton' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/%2e' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/%2e%2' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/%2e./%2e%2e/.%2e/%2e.bar' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com////../..' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/bar//../..' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo/bar//..' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/%20foo' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo%' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo%2' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo%2zbar' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo%2©zbar' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo%41%7a' => ['scheme' => 'http', 'host' => 'example.com'], + "http://example.com/foo \u{0091}%91" => null, + 'http://example.com/foo%00%51' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/(%28:%3A%29)' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/%3A%3a%3C%3c' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/foo bar' => null, + 'http://example.com\\foo\\bar' => null, + 'http://example.com/%7Ffp3%3Eju%3Dduvgw%3Dd' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/@asdf%40' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/你好你好' => ['scheme' => 'http', 'host' => 'example.com'], + 'http://example.com/‥/foo' => ['scheme' => 'http', 'host' => 'example.com'], + "http://example.com/\u{feff}/foo" => ['scheme' => 'http', 'host' => 'example.com'], + "http://example.com\u{002f}\u{202e}\u{002f}\u{0066}\u{006f}\u{006f}\u{002f}\u{202d}\u{002f}\u{0062}\u{0061}\u{0072}\u{0027}\u{0020}" => ['scheme' => 'http', 'host' => 'example.com'], + 'http://www.google.com/foo?bar=baz#' => ['scheme' => 'http', 'host' => 'www.google.com'], + 'http://www.google.com/foo?bar=baz# »' => ['scheme' => 'http', 'host' => 'www.google.com'], + 'data:test# »' => ['scheme' => 'data', 'host' => null], + 'http://www.google.com' => ['scheme' => 'http', 'host' => 'www.google.com'], + 'http://192.0x00A80001' => ['scheme' => 'http', 'host' => '192.0x00A80001'], + 'http://www/foo%2Ehtml' => ['scheme' => 'http', 'host' => 'www'], + 'http://www/foo/%2E/html' => ['scheme' => 'http', 'host' => 'www'], + 'http://%25DOMAIN:foobar@foodomain.com/' => ['scheme' => 'http', 'host' => 'foodomain.com'], + "http:\\www.google.com\foo" => null, + 'http://foo:80/' => ['scheme' => 'http', 'host' => 'foo'], + 'http://foo:81/' => ['scheme' => 'http', 'host' => 'foo'], + 'httpa://foo:80/' => ['scheme' => 'httpa', 'host' => 'foo'], + 'https://foo:443/' => ['scheme' => 'https', 'host' => 'foo'], + 'https://foo:80/' => ['scheme' => 'https', 'host' => 'foo'], + 'ftp://foo:21/' => ['scheme' => 'ftp', 'host' => 'foo'], + 'ftp://foo:80/' => ['scheme' => 'ftp', 'host' => 'foo'], + 'gopher://foo:70/' => ['scheme' => 'gopher', 'host' => 'foo'], + 'gopher://foo:443/' => ['scheme' => 'gopher', 'host' => 'foo'], + 'ws://foo:80/' => ['scheme' => 'ws', 'host' => 'foo'], + 'ws://foo:81/' => ['scheme' => 'ws', 'host' => 'foo'], + 'ws://foo:443/' => ['scheme' => 'ws', 'host' => 'foo'], + 'ws://foo:815/' => ['scheme' => 'ws', 'host' => 'foo'], + 'wss://foo:80/' => ['scheme' => 'wss', 'host' => 'foo'], + 'wss://foo:81/' => ['scheme' => 'wss', 'host' => 'foo'], + 'wss://foo:443/' => ['scheme' => 'wss', 'host' => 'foo'], + 'wss://foo:815/' => ['scheme' => 'wss', 'host' => 'foo'], + 'http:@www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:/@www.example.com' => ['scheme' => 'http', 'host' => null], + 'http://@www.example.com' => ['scheme' => 'http', 'host' => 'www.example.com'], + 'http:a:b@www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:/a:b@www.example.com' => ['scheme' => 'http', 'host' => null], + 'http://a:b@www.example.com' => ['scheme' => 'http', 'host' => 'www.example.com'], + 'http://@pple.com' => ['scheme' => 'http', 'host' => 'pple.com'], + 'http::b@www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:/:b@www.example.com' => ['scheme' => 'http', 'host' => null], + 'http://:b@www.example.com' => ['scheme' => 'http', 'host' => 'www.example.com'], + 'http:a:@www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:/a:@www.example.com' => ['scheme' => 'http', 'host' => null], + 'http://a:@www.example.com' => ['scheme' => 'http', 'host' => 'www.example.com'], + 'http://www.@pple.com' => ['scheme' => 'http', 'host' => 'pple.com'], + 'http://:@www.example.com' => ['scheme' => 'http', 'host' => 'www.example.com'], + '/test.txt' => ['scheme' => null, 'host' => null], + '.' => ['scheme' => null, 'host' => null], + '..' => ['scheme' => null, 'host' => null], + 'test.txt' => ['scheme' => null, 'host' => null], + './test.txt' => ['scheme' => null, 'host' => null], + '../test.txt' => ['scheme' => null, 'host' => null], + '../aaa/test.txt' => ['scheme' => null, 'host' => null], + '../../test.txt' => ['scheme' => null, 'host' => null], + '中/test.txt' => ['scheme' => null, 'host' => null], + 'http://www.example2.com' => ['scheme' => 'http', 'host' => 'www.example2.com'], + '//www.example2.com' => ['scheme' => null, 'host' => 'www.example2.com'], + 'file:...' => ['scheme' => 'file', 'host' => null], + 'file:..' => ['scheme' => 'file', 'host' => null], + 'file:a' => ['scheme' => 'file', 'host' => null], + 'http://ExAmPlE.CoM' => ['scheme' => 'http', 'host' => 'ExAmPlE.CoM'], + "http://GOO\u{200b}\u{2060}\u{feff}goo.com" => ['scheme' => 'http', 'host' => "GOO\u{200b}\u{2060}\u{feff}goo.com"], + 'http://www.foo。bar.com' => ['scheme' => 'http', 'host' => 'www.foo。bar.com'], + 'https://x/�?�#�' => ['scheme' => 'https', 'host' => 'x'], + 'http://Go.com' => ['scheme' => 'http', 'host' => 'Go.com'], + 'http://你好你好' => ['scheme' => 'http', 'host' => '你好你好'], + 'https://faß.ExAmPlE/' => ['scheme' => 'https', 'host' => 'faß.ExAmPlE'], + 'sc://faß.ExAmPlE/' => ['scheme' => 'sc', 'host' => 'faß.ExAmPlE'], + 'http://%30%78%63%30%2e%30%32%35%30.01' => ['scheme' => 'http', 'host' => '%30%78%63%30%2e%30%32%35%30.01'], + 'http://%30%78%63%30%2e%30%32%35%30.01%2e' => ['scheme' => 'http', 'host' => '%30%78%63%30%2e%30%32%35%30.01%2e'], + 'http://0Xc0.0250.01' => ['scheme' => 'http', 'host' => '0Xc0.0250.01'], + 'http://./' => ['scheme' => 'http', 'host' => '.'], + 'http://../' => ['scheme' => 'http', 'host' => '..'], + 'http://0..0x300/' => ['scheme' => 'http', 'host' => '0..0x300'], + 'http://foo:💩@example.com/bar' => ['scheme' => 'http', 'host' => 'example.com'], + '#x' => ['scheme' => null, 'host' => null], + 'https://@test@test@example:800/' => null, + 'https://@@@example' => null, + 'http://`{}:`{}@h/`{}?`{}' => ['scheme' => 'http', 'host' => 'h'], + 'http://host/?\'' => ['scheme' => 'http', 'host' => 'host'], + 'notspecial://host/?\'' => ['scheme' => 'notspecial', 'host' => 'host'], + '/some/path' => ['scheme' => null, 'host' => null], + 'i' => ['scheme' => null, 'host' => null], + '../i' => ['scheme' => null, 'host' => null], + '/i' => ['scheme' => null, 'host' => null], + '?i' => ['scheme' => null, 'host' => null], + '#i' => ['scheme' => null, 'host' => null], + 'about:/../' => ['scheme' => 'about', 'host' => null], + 'data:/../' => ['scheme' => 'data', 'host' => null], + 'javascript:/../' => ['scheme' => 'javascript', 'host' => null], + 'mailto:/../' => ['scheme' => 'mailto', 'host' => null], + 'sc://ñ.test/' => ['scheme' => 'sc', 'host' => 'ñ.test'], + 'sc://!"$&\'()*+,-.;<=>^_`{|}~/' => null, + 'sc://%/' => null, + 'x' => ['scheme' => null, 'host' => null], + 'sc:\\../' => ['scheme' => 'sc', 'host' => null], + 'sc::a@example.net' => ['scheme' => 'sc', 'host' => null], + 'wow:%NBD' => ['scheme' => 'wow', 'host' => null], + 'wow:%1G' => ['scheme' => 'wow', 'host' => null], + 'ftp://%e2%98%83' => ['scheme' => 'ftp', 'host' => '%e2%98%83'], + 'https://%e2%98%83' => ['scheme' => 'https', 'host' => '%e2%98%83'], + 'http://127.0.0.1:10100/relative_import.html' => ['scheme' => 'http', 'host' => '127.0.0.1'], + 'http://facebook.com/?foo=%7B%22abc%22' => ['scheme' => 'http', 'host' => 'facebook.com'], + 'https://localhost:3000/jqueryui@1.2.3' => ['scheme' => 'https', 'host' => 'localhost'], + '?a=b&c=d' => ['scheme' => null, 'host' => null], + '??a=b&c=d' => ['scheme' => null, 'host' => null], + 'http:' => ['scheme' => 'http', 'host' => null], + 'sc:' => ['scheme' => 'sc', 'host' => null], + 'http://foo.bar/baz?qux#fobar' => ['scheme' => 'http', 'host' => 'foo.bar'], + 'http://foo.bar/baz?qux#foo"bar' => ['scheme' => 'http', 'host' => 'foo.bar'], + 'http://foo.bar/baz?qux#foo ['scheme' => 'http', 'host' => 'foo.bar'], + 'http://foo.bar/baz?qux#foo>bar' => ['scheme' => 'http', 'host' => 'foo.bar'], + 'http://foo.bar/baz?qux#foo`bar' => ['scheme' => 'http', 'host' => 'foo.bar'], + 'http://192.168.257' => ['scheme' => 'http', 'host' => '192.168.257'], + 'http://192.168.257.com' => ['scheme' => 'http', 'host' => '192.168.257.com'], + 'http://256' => ['scheme' => 'http', 'host' => '256'], + 'http://256.com' => ['scheme' => 'http', 'host' => '256.com'], + 'http://999999999' => ['scheme' => 'http', 'host' => '999999999'], + 'http://999999999.com' => ['scheme' => 'http', 'host' => '999999999.com'], + 'http://10000000000.com' => ['scheme' => 'http', 'host' => '10000000000.com'], + 'http://4294967295' => ['scheme' => 'http', 'host' => '4294967295'], + 'http://0xffffffff' => ['scheme' => 'http', 'host' => '0xffffffff'], + 'http://256.256.256.256.256' => ['scheme' => 'http', 'host' => '256.256.256.256.256'], + 'https://0x.0x.0' => ['scheme' => 'https', 'host' => '0x.0x.0'], + 'file:///C%3A/' => ['scheme' => 'file', 'host' => ''], + 'file:///C%7C/' => ['scheme' => 'file', 'host' => ''], + 'pix/submit.gif' => ['scheme' => null, 'host' => null], + '//d:' => ['scheme' => null, 'host' => 'd'], + '//d:/..' => ['scheme' => null, 'host' => 'd'], + 'file:' => ['scheme' => 'file', 'host' => null], + '?x' => ['scheme' => null, 'host' => null], + 'file:?x' => ['scheme' => 'file', 'host' => null], + 'file:#x' => ['scheme' => 'file', 'host' => null], + 'file:\\//' => ['scheme' => 'file', 'host' => null], + 'file:\\\\' => ['scheme' => 'file', 'host' => null], + 'file:\\\\?fox' => ['scheme' => 'file', 'host' => null], + 'file:\\\\#guppy' => ['scheme' => 'file', 'host' => null], + 'file://spider///' => ['scheme' => 'file', 'host' => 'spider'], + 'file:\\localhost//' => ['scheme' => 'file', 'host' => null], + 'file:///localhost//cat' => ['scheme' => 'file', 'host' => ''], + 'file://\\/localhost//cat' => null, + 'file://localhost//a//../..//' => ['scheme' => 'file', 'host' => 'localhost'], + '/////mouse' => ['scheme' => null, 'host' => ''], + '\\//pig' => ['scheme' => null, 'host' => null], + '\\/localhost//pig' => ['scheme' => null, 'host' => null], + '//localhost//pig' => ['scheme' => null, 'host' => 'localhost'], + '/..//localhost//pig' => ['scheme' => null, 'host' => null], + 'file://' => ['scheme' => 'file', 'host' => ''], + '/rooibos' => ['scheme' => null, 'host' => null], + '/?chai' => ['scheme' => null, 'host' => null], + 'C|' => ['scheme' => null, 'host' => null], + 'C|#' => ['scheme' => null, 'host' => null], + 'C|?' => ['scheme' => null, 'host' => null], + 'C|/' => ['scheme' => null, 'host' => null], + "C|\n/" => null, + 'C|\\' => ['scheme' => null, 'host' => null], + 'C' => ['scheme' => null, 'host' => null], + 'C|a' => ['scheme' => null, 'host' => null], + '/c:/foo/bar' => ['scheme' => null, 'host' => null], + '/c|/foo/bar' => ['scheme' => null, 'host' => null], + "file:\c:\foo\bar" => null, + 'file://example.net/C:/' => ['scheme' => 'file', 'host' => 'example.net'], + 'file://1.2.3.4/C:/' => ['scheme' => 'file', 'host' => '1.2.3.4'], + 'file://[1::8]/C:/' => ['scheme' => 'file', 'host' => '[1::8]'], + 'file:/C|/' => ['scheme' => 'file', 'host' => null], + 'file://C|/' => null, + 'file:?q=v' => ['scheme' => 'file', 'host' => null], + 'file:#frag' => ['scheme' => 'file', 'host' => null], + 'http://[1:0::]' => ['scheme' => 'http', 'host' => '[1:0::]'], + 'sc://ñ' => ['scheme' => 'sc', 'host' => 'ñ'], + 'sc://ñ?x' => ['scheme' => 'sc', 'host' => 'ñ'], + 'sc://ñ#x' => ['scheme' => 'sc', 'host' => 'ñ'], + 'sc://?' => ['scheme' => 'sc', 'host' => ''], + 'sc://#' => ['scheme' => 'sc', 'host' => ''], + '////' => ['scheme' => null, 'host' => ''], + '////x/' => ['scheme' => null, 'host' => ''], + 'tftp://foobar.com/someconfig;mode=netascii' => ['scheme' => 'tftp', 'host' => 'foobar.com'], + 'telnet://user:pass@foobar.com:23/' => ['scheme' => 'telnet', 'host' => 'foobar.com'], + 'ut2004://10.10.10.10:7777/Index.ut2' => ['scheme' => 'ut2004', 'host' => '10.10.10.10'], + 'redis://foo:bar@somehost:6379/0?baz=bam&qux=baz' => ['scheme' => 'redis', 'host' => 'somehost'], + 'rsync://foo@host:911/sup' => ['scheme' => 'rsync', 'host' => 'host'], + 'git://github.com/foo/bar.git' => ['scheme' => 'git', 'host' => 'github.com'], + 'irc://myserver.com:6999/channel?passwd' => ['scheme' => 'irc', 'host' => 'myserver.com'], + 'dns://fw.example.org:9999/foo.bar.org?type=TXT' => ['scheme' => 'dns', 'host' => 'fw.example.org'], + 'ldap://localhost:389/ou=People,o=JNDITutorial' => ['scheme' => 'ldap', 'host' => 'localhost'], + 'git+https://github.com/foo/bar' => ['scheme' => 'git+https', 'host' => 'github.com'], + 'urn:ietf:rfc:2648' => ['scheme' => 'urn', 'host' => null], + 'tag:joe@example.org,2001:foo/bar' => ['scheme' => 'tag', 'host' => null], + 'non-special://%E2%80%A0/' => ['scheme' => 'non-special', 'host' => '%E2%80%A0'], + 'non-special://H%4fSt/path' => ['scheme' => 'non-special', 'host' => 'H%4fSt'], + 'non-special://[1:2:0:0:5:0:0:0]/' => ['scheme' => 'non-special', 'host' => '[1:2:0:0:5:0:0:0]'], + 'non-special://[1:2:0:0:0:0:0:3]/' => ['scheme' => 'non-special', 'host' => '[1:2:0:0:0:0:0:3]'], + 'non-special://[1:2::3]:80/' => ['scheme' => 'non-special', 'host' => '[1:2::3]'], + 'blob:https://example.com:443/' => ['scheme' => 'blob', 'host' => null], + 'blob:d3958f5c-0777-0845-9dcf-2cb28783acaf' => ['scheme' => 'blob', 'host' => null], + 'http://0177.0.0.0189' => ['scheme' => 'http', 'host' => '0177.0.0.0189'], + 'http://0x7f.0.0.0x7g' => ['scheme' => 'http', 'host' => '0x7f.0.0.0x7g'], + 'http://0X7F.0.0.0X7G' => ['scheme' => 'http', 'host' => '0X7F.0.0.0X7G'], + 'http://[0:1:0:1:0:1:0:1]' => ['scheme' => 'http', 'host' => '[0:1:0:1:0:1:0:1]'], + 'http://[1:0:1:0:1:0:1:0]' => ['scheme' => 'http', 'host' => '[1:0:1:0:1:0:1:0]'], + 'http://example.org/test?"' => ['scheme' => 'http', 'host' => 'example.org'], + 'http://example.org/test?#' => ['scheme' => 'http', 'host' => 'example.org'], + 'http://example.org/test?<' => ['scheme' => 'http', 'host' => 'example.org'], + 'http://example.org/test?>' => ['scheme' => 'http', 'host' => 'example.org'], + 'http://example.org/test?⌣' => ['scheme' => 'http', 'host' => 'example.org'], + 'http://example.org/test?%23%23' => ['scheme' => 'http', 'host' => 'example.org'], + 'http://example.org/test?%GH' => ['scheme' => 'http', 'host' => 'example.org'], + 'http://example.org/test?a#%EF' => ['scheme' => 'http', 'host' => 'example.org'], + 'http://example.org/test?a#%GH' => ['scheme' => 'http', 'host' => 'example.org'], + 'test-a-colon-slash.html' => ['scheme' => null, 'host' => null], + 'test-a-colon-slash-slash.html' => ['scheme' => null, 'host' => null], + 'test-a-colon-slash-b.html' => ['scheme' => null, 'host' => null], + 'test-a-colon-slash-slash-b.html' => ['scheme' => null, 'host' => null], + 'http://example.org/test?a#bc' => ['scheme' => 'http', 'host' => 'example.org'], + 'http:\\/\\/f:b\\/c' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/f: \\/c' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/f:fifty-two\\/c' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/f:999999\\/c' => ['scheme' => 'http', 'host' => null], + 'non-special:\\/\\/f:999999\\/c' => ['scheme' => 'non-special', 'host' => null], + 'http:\\/\\/f: 21 \\/ b ? d # e ' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/[1::2]:3:4' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/2001::1' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/2001::1]' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/2001::1]:80' => ['scheme' => 'http', 'host' => null], + 'file:\\/\\/example:1\\/' => ['scheme' => 'file', 'host' => null], + 'file:\\/\\/example:test\\/' => ['scheme' => 'file', 'host' => null], + 'file:\\/\\/example%\\/' => ['scheme' => 'file', 'host' => null], + 'file:\\/\\/[example]\\/' => ['scheme' => 'file', 'host' => null], + 'http:\\/\\/user:pass@\\/' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/foo:-80\\/' => ['scheme' => 'http', 'host' => null], + 'http:\\/:@\\/www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/user@\\/www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:@\\/www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/@\\/www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/@\\/www.example.com' => ['scheme' => 'http', 'host' => null], + 'https:@\\/www.example.com' => ['scheme' => 'https', 'host' => null], + 'http:a:b@\\/www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/a:b@\\/www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/a:b@\\/www.example.com' => ['scheme' => 'http', 'host' => null], + 'http::@\\/www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:@:www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/@:www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/@:www.example.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/example example.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/Goo%20 goo%7C|.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/[]' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/[:]' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/GOO\\u00a0\\u3000goo.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/\\ufdd0zyx.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/%ef%b7%90zyx.com' => ['scheme' => 'http', 'host' => null], + 'https:\\/\\/\\ufffd' => ['scheme' => 'https', 'host' => null], + 'https:\\/\\/%EF%BF%BD' => ['scheme' => 'https', 'host' => null], + 'http:\\/\\/\\uff05\\uff14\\uff11.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/%ef%bc%85%ef%bc%94%ef%bc%91.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/\\uff05\\uff10\\uff10.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/%ef%bc%85%ef%bc%90%ef%bc%90.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/%zz%66%a.com' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/%25' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/hello%00' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/192.168.0.257' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/%3g%78%63%30%2e%30%32%35%30%2E.01' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/192.168.0.1 hello' => ['scheme' => 'http', 'host' => null], + 'https:\\/\\/x x:12' => ['scheme' => 'https', 'host' => null], + 'http:\\/\\/[www.google.com]\\/' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/[google.com]' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/[::1.2.3.4x]' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/[::1.2.3.]' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/[::1.2.]' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/[::1.]' => ['scheme' => 'http', 'host' => null], + '..\\/i' => ['scheme' => null, 'host' => null], + '\\/i' => ['scheme' => null, 'host' => null], + 'sc:\\/\\/\\u0000\\/' => ['scheme' => 'sc', 'host' => null], + 'sc:\\/\\/ \\/' => ['scheme' => 'sc', 'host' => null], + 'sc:\\/\\/@\\/' => ['scheme' => 'sc', 'host' => null], + 'sc:\\/\\/te@s:t@\\/' => ['scheme' => 'sc', 'host' => null], + 'sc:\\/\\/:\\/' => ['scheme' => 'sc', 'host' => null], + 'sc:\\/\\/:12\\/' => ['scheme' => 'sc', 'host' => null], + 'sc:\\/\\/[\\/' => ['scheme' => 'sc', 'host' => null], + 'sc:\\/\\/\\\\/' => ['scheme' => 'sc', 'host' => null], + 'sc:\\/\\/]\\/' => ['scheme' => 'sc', 'host' => null], + 'ftp:\\/\\/example.com%80\\/' => ['scheme' => 'ftp', 'host' => null], + 'ftp:\\/\\/example.com%A0\\/' => ['scheme' => 'ftp', 'host' => null], + 'https:\\/\\/example.com%80\\/' => ['scheme' => 'https', 'host' => null], + 'https:\\/\\/example.com%A0\\/' => ['scheme' => 'https', 'host' => null], + 'http:\\/\\/10000000000' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/4294967296' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/0xffffffff1' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/256.256.256.256' => ['scheme' => 'http', 'host' => null], + 'https:\\/\\/0x100000000\\/test' => ['scheme' => 'https', 'host' => null], + 'https:\\/\\/256.0.0.1\\/test' => ['scheme' => 'https', 'host' => null], + 'http:\\/\\/[0:1:2:3:4:5:6:7:8]' => ['scheme' => 'http', 'host' => null], + 'https:\\/\\/[0::0::0]' => ['scheme' => 'https', 'host' => null], + 'https:\\/\\/[0:.0]' => ['scheme' => 'https', 'host' => null], + 'https:\\/\\/[0:0:]' => ['scheme' => 'https', 'host' => null], + 'https:\\/\\/[0:1:2:3:4:5:6:7.0.0.0.1]' => ['scheme' => 'https', 'host' => null], + 'https:\\/\\/[0:1.00.0.0.0]' => ['scheme' => 'https', 'host' => null], + 'https:\\/\\/[0:1.290.0.0.0]' => ['scheme' => 'https', 'host' => null], + 'https:\\/\\/[0:1.23.23]' => ['scheme' => 'https', 'host' => null], + 'http:\\/\\/?' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/#' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/f:4294967377\\/c' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/f:18446744073709551697\\/c' => ['scheme' => 'http', 'host' => null], + 'http:\\/\\/f:340282366920938463463374607431768211537\\/c' => ['scheme' => 'http', 'host' => null], + 'non-special:\\/\\/[:80\\/' => ['scheme' => 'non-special', 'host' => null], + 'http:\\/\\/[::127.0.0.0.1]' => ['scheme' => 'http', 'host' => null], + 'a' => ['scheme' => null, 'host' => null], + 'a\\/' => ['scheme' => null, 'host' => null], + 'a\\/\\/' => ['scheme' => null, 'host' => null], + 'test-a-colon.html' => ['scheme' => null, 'host' => null], + 'test-a-colon-b.html' => ['scheme' => null, 'host' => null], + ]; + + foreach ($urls as $url => $expected) { + yield $url => [$url, $expected]; + } + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/TextSanitizer/StringSanitizer.php b/src/Symfony/Component/HtmlSanitizer/TextSanitizer/StringSanitizer.php new file mode 100644 index 0000000000000..a291c622a136e --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/TextSanitizer/StringSanitizer.php @@ -0,0 +1,82 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\TextSanitizer; + +/** + * @internal + */ +final class StringSanitizer +{ + private const LOWERCASE = [ + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', + 'abcdefghijklmnopqrstuvwxyz', + ]; + + private const REPLACEMENTS = [ + [ + // """ is shorter than """ + '"', + + // Fix several potential issues in how browsers intepret attributes values + '+', + '=', + '@', + '`', + + // Some DB engines will transform UTF8 full-width characters their classical version + // if the data is saved in a non-UTF8 field + '<', + '>', + '+', + '=', + '@', + '`', + ], + [ + '"', + + '+', + '=', + '@', + '`', + + '<', + '>', + '+', + '=', + '@', + '`', + ], + ]; + + /** + * Applies a transformation to lowercase following W3C HTML Standard. + * + * @see https://w3c.github.io/html-reference/terminology.html#case-insensitive + */ + public static function htmlLower(string $string): string + { + return strtr($string, self::LOWERCASE[0], self::LOWERCASE[1]); + } + + /** + * Encodes the HTML entities in the given string for safe injection in a document's DOM. + */ + public static function encodeHtmlEntities(string $string): string + { + return str_replace( + self::REPLACEMENTS[0], + self::REPLACEMENTS[1], + htmlspecialchars($string, \ENT_QUOTES | \ENT_SUBSTITUTE, 'UTF-8') + ); + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/TextSanitizer/UrlSanitizer.php b/src/Symfony/Component/HtmlSanitizer/TextSanitizer/UrlSanitizer.php new file mode 100644 index 0000000000000..c4643f7b24635 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/TextSanitizer/UrlSanitizer.php @@ -0,0 +1,136 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\TextSanitizer; + +use League\Uri\Exceptions\SyntaxError; +use League\Uri\UriString; + +/** + * @internal + */ +final class UrlSanitizer +{ + /** + * Sanitizes a given URL string. + * + * In addition to ensuring $input is a valid URL, this sanitizer checks that: + * * the URL's host is allowed ; + * * the URL's scheme is allowed ; + * * the URL is allowed to be relative if it is ; + * + * It also transforms the URL to HTTPS if requested. + */ + public static function sanitize(?string $input, array $allowedSchemes = null, bool $forceHttps = false, array $allowedHosts = null, bool $allowRelative = false): ?string + { + if (!$input) { + return null; + } + + $url = self::parse($input); + + // Malformed URL + if (!$url || !\is_array($url)) { + return null; + } + + // No scheme and relative not allowed + if (!$allowRelative && !$url['scheme']) { + return null; + } + + // Forbidden scheme + if ($url['scheme'] && null !== $allowedSchemes && !\in_array($url['scheme'], $allowedSchemes, true)) { + return null; + } + + // If the scheme used is not supposed to have a host, do not check the host + if (!self::isHostlessScheme($url['scheme'])) { + // No host and relative not allowed + if (!$allowRelative && !$url['host']) { + return null; + } + + // Forbidden host + if ($url['host'] && null !== $allowedHosts && !self::isAllowedHost($url['host'], $allowedHosts)) { + return null; + } + } + + // Force HTTPS + if ($forceHttps && 'http' === $url['scheme']) { + $url['scheme'] = 'https'; + } + + return UriString::build($url); + } + + /** + * Parses a given URL and returns an array of its components. + * + * @return null|array{ + * scheme:?string, + * user:?string, + * pass:?string, + * host:?string, + * port:?int, + * path:string, + * query:?string, + * fragment:?string + * } + */ + public static function parse(string $url): ?array + { + if (!$url) { + return null; + } + + try { + return UriString::parse($url); + } catch (SyntaxError) { + return null; + } + } + + private static function isHostlessScheme(?string $scheme): bool + { + return \in_array($scheme, ['blob', 'chrome', 'data', 'file', 'geo', 'mailto', 'maps', 'tel', 'view-source'], true); + } + + private static function isAllowedHost(?string $host, array $allowedHosts): bool + { + if (null === $host) { + return \in_array(null, $allowedHosts, true); + } + + $parts = array_reverse(explode('.', $host)); + + foreach ($allowedHosts as $allowedHost) { + if (self::matchAllowedHostParts($parts, array_reverse(explode('.', $allowedHost)))) { + return true; + } + } + + return false; + } + + private static function matchAllowedHostParts(array $uriParts, array $trustedParts): bool + { + // Check each chunk of the domain is valid + foreach ($trustedParts as $key => $trustedPart) { + if ($uriParts[$key] !== $trustedPart) { + return false; + } + } + + return true; + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/AttributeSanitizer/AttributeSanitizerInterface.php b/src/Symfony/Component/HtmlSanitizer/Visitor/AttributeSanitizer/AttributeSanitizerInterface.php new file mode 100644 index 0000000000000..c4daa1d17fbe3 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/AttributeSanitizer/AttributeSanitizerInterface.php @@ -0,0 +1,43 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer; + +use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; + +/** + * Implements attribute-specific sanitization logic. + * + * @author Titouan Galopin + * + * @experimental + */ +interface AttributeSanitizerInterface +{ + /** + * Returns the list of element names supported, or null to support all elements. + * + * @return list|null + */ + public function getSupportedElements(): ?array; + + /** + * Returns the list of attributes names supported, or null to support all attributes. + * + * @return list|null + */ + public function getSupportedAttributes(): ?array; + + /** + * Returns the sanitized value of a given attribute for the given element. + */ + public function sanitizeAttribute(string $element, string $attribute, string $value, HtmlSanitizerConfig $config): ?string; +} diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/AttributeSanitizer/UrlAttributeSanitizer.php b/src/Symfony/Component/HtmlSanitizer/Visitor/AttributeSanitizer/UrlAttributeSanitizer.php new file mode 100644 index 0000000000000..2d5c5f0b975db --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/AttributeSanitizer/UrlAttributeSanitizer.php @@ -0,0 +1,53 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer; + +use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; +use Symfony\Component\HtmlSanitizer\TextSanitizer\UrlSanitizer; + +/** + * @experimental + */ +final class UrlAttributeSanitizer implements AttributeSanitizerInterface +{ + public function getSupportedElements(): ?array + { + // Check all elements for URL attributes + return null; + } + + public function getSupportedAttributes(): ?array + { + return ['src', 'href', 'lowsrc', 'background', 'ping']; + } + + public function sanitizeAttribute(string $element, string $attribute, string $value, HtmlSanitizerConfig $config): ?string + { + if ('a' === $element) { + return UrlSanitizer::sanitize( + $value, + $config->getAllowedLinkSchemes(), + $config->getForceHttpsUrls(), + $config->getAllowedLinkHosts(), + $config->getAllowRelativeLinks(), + ); + } + + return UrlSanitizer::sanitize( + $value, + $config->getAllowedMediaSchemes(), + $config->getForceHttpsUrls(), + $config->getAllowedMediaHosts(), + $config->getAllowRelativeMedias(), + ); + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/DomVisitor.php b/src/Symfony/Component/HtmlSanitizer/Visitor/DomVisitor.php new file mode 100644 index 0000000000000..30a8f0e2bbe9b --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/DomVisitor.php @@ -0,0 +1,176 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Visitor; + +use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; +use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; +use Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer\AttributeSanitizerInterface; +use Symfony\Component\HtmlSanitizer\Visitor\Model\Cursor; +use Symfony\Component\HtmlSanitizer\Visitor\Node\BlockedNode; +use Symfony\Component\HtmlSanitizer\Visitor\Node\DocumentNode; +use Symfony\Component\HtmlSanitizer\Visitor\Node\Node; +use Symfony\Component\HtmlSanitizer\Visitor\Node\NodeInterface; +use Symfony\Component\HtmlSanitizer\Visitor\Node\TextNode; + +/** + * Iterates over the parsed DOM tree to build the sanitized tree. + * + * The DomVisitor iterates over the parsed DOM tree, visits its nodes and build + * a sanitized tree with their attributes and content. + * + * @author Titouan Galopin + * + * @internal + */ +final class DomVisitor +{ + private HtmlSanitizerConfig $config; + + /** + * Registry of allowed/blocked elements: + * * If an element is present as a key and contains an array, the element should be allowed + * and the array is the list of allowed attributes. + * * If an element is present as a key and contains "false", the element should be blocked. + * * If an element is not present as a key, the element should be dropped. + * + * @var array> + */ + private array $elementsConfig; + + /** + * Registry of attributes to forcefuly set on nodes, index by element and attribute. + * + * @var array> + */ + private array $forcedAttributes; + + /** + * Registry of attributes sanitizers indexed by element name and attribute name for + * faster sanitization. + * + * @var array>> + */ + private array $attributeSanitizers = []; + + /** + * @param array> $elementsConfig + */ + public function __construct(HtmlSanitizerConfig $config, array $elementsConfig) + { + $this->config = $config; + $this->elementsConfig = $elementsConfig; + $this->forcedAttributes = $config->getForcedAttributes(); + + foreach ($config->getAttributeSanitizers() as $attributeSanitizer) { + foreach ($attributeSanitizer->getSupportedElements() ?? ['*'] as $element) { + foreach ($attributeSanitizer->getSupportedAttributes() ?? ['*'] as $attribute) { + $this->attributeSanitizers[$element][$attribute][] = $attributeSanitizer; + } + } + } + } + + public function visit(\DOMDocumentFragment $domNode): ?NodeInterface + { + $cursor = new Cursor(new DocumentNode()); + $this->visitChildren($domNode, $cursor); + + return $cursor->node; + } + + private function visitNode(\DOMNode $domNode, Cursor $cursor): void + { + $nodeName = StringSanitizer::htmlLower($domNode->nodeName); + + // Element should be dropped, including its children + if (!\array_key_exists($nodeName, $this->elementsConfig)) { + return; + } + + // Otherwise, visit recursively + $this->enterNode($nodeName, $domNode, $cursor); + $this->visitChildren($domNode, $cursor); + $cursor->node = $cursor->node->getParent(); + } + + private function enterNode(string $domNodeName, \DOMNode $domNode, Cursor $cursor): void + { + // Element should be blocked, retaining its children + if (false === $this->elementsConfig[$domNodeName]) { + $node = new BlockedNode($cursor->node); + + $cursor->node->addChild($node); + $cursor->node = $node; + + return; + } + + // Otherwise create the node + $node = new Node($cursor->node, $domNodeName); + $this->setAttributes($domNodeName, $domNode, $node, $this->elementsConfig[$domNodeName]); + + // Force configured attributes + foreach ($this->forcedAttributes[$domNodeName] ?? [] as $attribute => $value) { + $node->setAttribute($attribute, $value); + } + + $cursor->node->addChild($node); + $cursor->node = $node; + } + + private function visitChildren(\DOMNode $domNode, Cursor $cursor): void + { + /** @var \DOMNode $child */ + foreach ($domNode->childNodes ?? [] as $child) { + if ('#text' === $child->nodeName) { + // Add text directly for performance + $cursor->node->addChild(new TextNode($cursor->node, $child->nodeValue)); + } elseif (!$child instanceof \DOMText) { + // Otherwise continue the visit recursively + // Ignore comments for security reasons (interpreted differently by browsers) + $this->visitNode($child, $cursor); + } + } + } + + /** + * Set attributes from a DOM node to a sanitized node. + */ + private function setAttributes(string $domNodeName, \DOMNode $domNode, Node $node, array $allowedAttributes = []): void + { + /** @var iterable<\DOMAttr> $domAttributes */ + if (!$domAttributes = $domNode->attributes ? $domNode->attributes->getIterator() : []) { + return; + } + + foreach ($domAttributes as $attribute) { + $name = StringSanitizer::htmlLower($attribute->name); + + if (isset($allowedAttributes[$name])) { + $value = $attribute->value; + + // Sanitize the attribute value if there are attribute sanitizers for it + $attributeSanitizers = array_merge( + $this->attributeSanitizers[$domNodeName][$name] ?? [], + $this->attributeSanitizers['*'][$name] ?? [], + $this->attributeSanitizers[$domNodeName]['*'] ?? [], + ); + + foreach ($attributeSanitizers as $sanitizer) { + $value = $sanitizer->sanitizeAttribute($domNodeName, $name, $value, $this->config); + } + + $node->setAttribute($name, $value); + } + } + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/Model/Cursor.php b/src/Symfony/Component/HtmlSanitizer/Visitor/Model/Cursor.php new file mode 100644 index 0000000000000..5214c09b77d20 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/Model/Cursor.php @@ -0,0 +1,26 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Visitor\Model; + +use Symfony\Component\HtmlSanitizer\Visitor\Node\NodeInterface; + +/** + * @author Titouan Galopin + * + * @internal + */ +final class Cursor +{ + public function __construct(public ?NodeInterface $node) + { + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/Node/BlockedNode.php b/src/Symfony/Component/HtmlSanitizer/Visitor/Node/BlockedNode.php new file mode 100644 index 0000000000000..d438313d4ec76 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/Node/BlockedNode.php @@ -0,0 +1,48 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Visitor\Node; + +/** + * @author Titouan Galopin + * + * @experimental + */ +final class BlockedNode implements NodeInterface +{ + private NodeInterface $parentNode; + private array $children = []; + + public function __construct(NodeInterface $parentNode) + { + $this->parentNode = $parentNode; + } + + public function addChild(NodeInterface $node): void + { + $this->children[] = $node; + } + + public function getParent(): ?NodeInterface + { + return $this->parentNode; + } + + public function render(): string + { + $rendered = ''; + foreach ($this->children as $child) { + $rendered .= $child->render(); + } + + return $rendered; + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/Node/DocumentNode.php b/src/Symfony/Component/HtmlSanitizer/Visitor/Node/DocumentNode.php new file mode 100644 index 0000000000000..d5ef5363015e7 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/Node/DocumentNode.php @@ -0,0 +1,42 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Visitor\Node; + +/** + * @author Titouan Galopin + * + * @experimental + */ +final class DocumentNode implements NodeInterface +{ + private array $children = []; + + public function addChild(NodeInterface $node): void + { + $this->children[] = $node; + } + + public function getParent(): ?NodeInterface + { + return null; + } + + public function render(): string + { + $rendered = ''; + foreach ($this->children as $child) { + $rendered .= $child->render(); + } + + return $rendered; + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/Node/Node.php b/src/Symfony/Component/HtmlSanitizer/Visitor/Node/Node.php new file mode 100644 index 0000000000000..76838028dbc0d --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/Node/Node.php @@ -0,0 +1,106 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Visitor\Node; + +use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; + +/** + * @author Titouan Galopin + * + * @experimental + */ +final class Node implements NodeInterface +{ + private NodeInterface $parent; + private string $tagName; + private array $attributes = []; + private array $children = []; + + public function __construct(NodeInterface $parent, string $tagName) + { + $this->parent = $parent; + $this->tagName = $tagName; + } + + public function getParent(): ?NodeInterface + { + return $this->parent; + } + + public function getAttribute(string $name): ?string + { + return $this->attributes[$name] ?? null; + } + + public function setAttribute(string $name, ?string $value): void + { + // Always use only the first declaration (ease sanitization) + if (!\array_key_exists($name, $this->attributes)) { + $this->attributes[$name] = $value; + } + } + + public function addChild(NodeInterface $node): void + { + $this->children[] = $node; + } + + public function render(): string + { + if (!$this->children) { + return '<'.$this->tagName.$this->renderAttributes().' />'; + } + + $rendered = '<'.$this->tagName.$this->renderAttributes().'>'; + foreach ($this->children as $child) { + $rendered .= $child->render(); + } + + return $rendered.'tagName.'>'; + } + + private function renderAttributes(): string + { + $rendered = []; + foreach ($this->attributes as $name => $value) { + if (null === $value) { + // Tag should be removed as a sanitizer found suspect data inside + continue; + } + + $attr = StringSanitizer::encodeHtmlEntities($name); + + if ('' !== $value) { + // In quirks mode, IE8 does a poor job producing innerHTML values. + // If JavaScript does: + // nodeA.innerHTML = nodeB.innerHTML; + // and nodeB contains (or even if ` was encoded properly): + //
    + // then IE8 will produce: + //
    + // as the value of nodeB.innerHTML and assign it to nodeA. + // IE8's HTML parser treats `` as a blank attribute value and foo=bar becomes a separate attribute. + // Adding a space at the end of the attribute prevents this by forcing IE8 to put double + // quotes around the attribute when computing nodeB.innerHTML. + if (str_contains($value, '`')) { + $value .= ' '; + } + + $attr .= '="'.StringSanitizer::encodeHtmlEntities($value).'"'; + } + + $rendered[] = $attr; + } + + return $rendered ? ' '.implode(' ', $rendered) : ''; + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/Node/NodeInterface.php b/src/Symfony/Component/HtmlSanitizer/Visitor/Node/NodeInterface.php new file mode 100644 index 0000000000000..27d9da7ed97ac --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/Node/NodeInterface.php @@ -0,0 +1,39 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Visitor\Node; + +/** + * Represents the sanitized version of a DOM node in the sanitized tree. + * + * Once the sanitization is done, nodes are rendered into the final output string. + * + * @author Titouan Galopin + * + * @experimental + */ +interface NodeInterface +{ + /** + * Add a child node to this node. + */ + public function addChild(self $node): void; + + /** + * Return the parent node of this node, or null if it has no parent node. + */ + public function getParent(): ?self; + + /** + * Render this node as a string, recursively rendering its children as well. + */ + public function render(): string; +} diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/Node/TextNode.php b/src/Symfony/Component/HtmlSanitizer/Visitor/Node/TextNode.php new file mode 100644 index 0000000000000..f06b7ccdf47d1 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/Node/TextNode.php @@ -0,0 +1,41 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\HtmlSanitizer\Visitor\Node; + +use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; + +/** + * @author Titouan Galopin + * + * @experimental + */ +final class TextNode implements NodeInterface +{ + public function __construct(private NodeInterface $parentNode, private string $text) + { + } + + public function addChild(NodeInterface $node): void + { + throw new \LogicException('Text nodes cannot have children.'); + } + + public function getParent(): ?NodeInterface + { + return $this->parentNode; + } + + public function render(): string + { + return StringSanitizer::encodeHtmlEntities($this->text); + } +} diff --git a/src/Symfony/Component/HtmlSanitizer/composer.json b/src/Symfony/Component/HtmlSanitizer/composer.json new file mode 100644 index 0000000000000..052b480fd1ced --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/composer.json @@ -0,0 +1,31 @@ +{ + "name": "symfony/html-sanitizer", + "type": "library", + "description": "Provides an object-oriented API to sanitize untrusted HTML input for safe insertion into a document's DOM.", + "keywords": ["html", "sanitizer", "purifier"], + "homepage": "https://symfony.com", + "license": "MIT", + "authors": [ + { + "name": "Titouan Galopin", + "email": "galopintitouan@gmail.com" + }, + { + "name": "Symfony Community", + "homepage": "https://symfony.com/contributors" + } + ], + "require": { + "php": ">=8.0.2", + "ext-dom": "*", + "league/uri": "^6.5", + "masterminds/html5": "^2.4" + }, + "autoload": { + "psr-4": { "Symfony\\Component\\HtmlSanitizer\\": "" }, + "exclude-from-classmap": [ + "/Tests/" + ] + }, + "minimum-stability": "dev" +} diff --git a/src/Symfony/Component/HtmlSanitizer/phpunit.xml.dist b/src/Symfony/Component/HtmlSanitizer/phpunit.xml.dist new file mode 100644 index 0000000000000..bb03155b35ae2 --- /dev/null +++ b/src/Symfony/Component/HtmlSanitizer/phpunit.xml.dist @@ -0,0 +1,30 @@ + + + + + + + + + + ./Tests/ + + + + + + ./ + + + ./Tests + ./vendor + + +