From 1f3e0d8d1614e76a0dfc8eb76fcc560937e51f73 Mon Sep 17 00:00:00 2001 From: Christian Flothmann Date: Sun, 30 Mar 2025 15:36:39 +0200 Subject: [PATCH] reject URLs with URL-encoded non UTF-8 characters in the host part --- .../Tests/TextSanitizer/UrlSanitizerTest.php | 6 +++--- .../HtmlSanitizer/TextSanitizer/UrlSanitizer.php | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/UrlSanitizerTest.php b/src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/UrlSanitizerTest.php index 0d366b7b9848f..391895024e456 100644 --- a/src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/UrlSanitizerTest.php +++ b/src/Symfony/Component/HtmlSanitizer/Tests/TextSanitizer/UrlSanitizerTest.php @@ -568,8 +568,8 @@ public static function provideParse(): iterable 'http://你好你好' => ['scheme' => 'http', 'host' => '你好你好'], 'https://faß.ExAmPlE/' => ['scheme' => 'https', 'host' => 'faß.ExAmPlE'], 'sc://faß.ExAmPlE/' => ['scheme' => 'sc', 'host' => 'faß.ExAmPlE'], - 'http://%30%78%63%30%2e%30%32%35%30.01' => ['scheme' => 'http', 'host' => '%30%78%63%30%2e%30%32%35%30.01'], - 'http://%30%78%63%30%2e%30%32%35%30.01%2e' => ['scheme' => 'http', 'host' => '%30%78%63%30%2e%30%32%35%30.01%2e'], + 'http://%30%78%63%30%2e%30%32%35%30.01' => null, + 'http://%30%78%63%30%2e%30%32%35%30.01%2e' => null, 'http://0Xc0.0250.01' => ['scheme' => 'http', 'host' => '0Xc0.0250.01'], 'http://./' => ['scheme' => 'http', 'host' => '.'], 'http://../' => ['scheme' => 'http', 'host' => '..'], @@ -689,7 +689,7 @@ public static function provideParse(): iterable 'urn:ietf:rfc:2648' => ['scheme' => 'urn', 'host' => null], 'tag:joe@example.org,2001:foo/bar' => ['scheme' => 'tag', 'host' => null], 'non-special://%E2%80%A0/' => ['scheme' => 'non-special', 'host' => '%E2%80%A0'], - 'non-special://H%4fSt/path' => ['scheme' => 'non-special', 'host' => 'H%4fSt'], + 'non-special://H%4fSt/path' => null, 'non-special://[1:2:0:0:5:0:0:0]/' => ['scheme' => 'non-special', 'host' => '[1:2:0:0:5:0:0:0]'], 'non-special://[1:2:0:0:0:0:0:3]/' => ['scheme' => 'non-special', 'host' => '[1:2:0:0:0:0:0:3]'], 'non-special://[1:2::3]:80/' => ['scheme' => 'non-special', 'host' => '[1:2::3]'], diff --git a/src/Symfony/Component/HtmlSanitizer/TextSanitizer/UrlSanitizer.php b/src/Symfony/Component/HtmlSanitizer/TextSanitizer/UrlSanitizer.php index 0a65873d55577..9920ecd88da4a 100644 --- a/src/Symfony/Component/HtmlSanitizer/TextSanitizer/UrlSanitizer.php +++ b/src/Symfony/Component/HtmlSanitizer/TextSanitizer/UrlSanitizer.php @@ -100,6 +100,10 @@ public static function parse(string $url): ?array return null; } + if (isset($parsedUrl['host']) && self::decodeUnreservedCharacters($parsedUrl['host']) !== $parsedUrl['host']) { + return null; + } + return $parsedUrl; } catch (SyntaxError) { return null; @@ -139,4 +143,16 @@ private static function matchAllowedHostParts(array $uriParts, array $trustedPar return true; } + + /** + * Implementation borrowed from League\Uri\Encoder::decodeUnreservedCharacters(). + */ + private static function decodeUnreservedCharacters(string $host): string + { + return preg_replace_callback( + ',%(2[1-9A-Fa-f]|[3-7][0-9A-Fa-f]|61|62|64|65|66|7[AB]|5F),', + static fn (array $matches): string => rawurldecode($matches[0]), + $host + ); + } }