From 052ec95af96981e54695d2f23c0cee4d9cf6858a Mon Sep 17 00:00:00 2001 From: Arjen van der Meijden Date: Thu, 31 Mar 2016 13:58:26 +0200 Subject: [PATCH] [Routing] Replace previously added preg_match with preg_replace_callback --- .../Routing/Generator/UrlGenerator.php | 65 ++++++++++--------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/src/Symfony/Component/Routing/Generator/UrlGenerator.php b/src/Symfony/Component/Routing/Generator/UrlGenerator.php index 939ba5f3636eb..8c09ab98cd802 100644 --- a/src/Symfony/Component/Routing/Generator/UrlGenerator.php +++ b/src/Symfony/Component/Routing/Generator/UrlGenerator.php @@ -27,6 +27,30 @@ */ class UrlGenerator implements UrlGeneratorInterface, ConfigurableRequirementsInterface { + /** + * This regexp matches all characters that should be percent encoded in paths for url's generated by this class. + * + * PHP's rawurlencode() encodes all chars except "a-zA-Z0-9-._~" according to RFC 3986. But we want to allow some chars + * to be used in their literal form (reasons below). Other chars inside the path must of course be encoded, e.g. + * "?" and "#" (would be interpreted wrongly as query and fragment identifier), + * "'" and """ (are used as delimiters in HTML). + * + * These characters are besides rawurlencode()'s list: + * - The slash '/' can be used to designate a hierarchical structure and we allow using it with this meaning, + * some webservers don't allow the slash in encoded form in the path for security reasons anyway + * see http://stackoverflow.com/questions/4069002/http-400-if-2f-part-of-get-url-in-jboss + * - The '@' and ':' are general delimiters in the URI specification but have only special meaning in the authority component, + * so they can safely be used in the path in unencoded form. + * - The ';', ',', '=', '+', '!', '*', '|' are only sub-delimiters that have no predefined meaning and can therefore be used literally + * so URI producing applications can use these chars to delimit subcomponents in a path segment without being encoded for better readability. + * + * The regexp is an inverse of the characters above. It allows to only call rawurlencode() for specific charaters, + * thus optimizing scenario's where there are no or very few such characters in urls. + * + * @internal + */ + const PATH_UNSAFE_CHARACTER_REGEXP = '#[^-.~a-zA-Z0-9_/@:;,=+!*|]+#'; + /** * @var RouteCollection */ @@ -48,37 +72,11 @@ class UrlGenerator implements UrlGeneratorInterface, ConfigurableRequirementsInt protected $logger; /** - * This array defines the characters (besides alphanumeric ones) that will not be percent-encoded in the path segment of the generated URL. + * This array used to define the characters (besides alphanumeric ones) that will not be percent-encoded in the path segment of the generated URL. * - * PHP's rawurlencode() encodes all chars except "a-zA-Z0-9-._~" according to RFC 3986. But we want to allow some chars - * to be used in their literal form (reasons below). Other chars inside the path must of course be encoded, e.g. - * "?" and "#" (would be interpreted wrongly as query and fragment identifier), - * "'" and """ (are used as delimiters in HTML). - */ - protected $decodedChars = array( - // the slash can be used to designate a hierarchical structure and we want allow using it with this meaning - // some webservers don't allow the slash in encoded form in the path for security reasons anyway - // see http://stackoverflow.com/questions/4069002/http-400-if-2f-part-of-get-url-in-jboss - '%2F' => '/', - // the following chars are general delimiters in the URI specification but have only special meaning in the authority component - // so they can safely be used in the path in unencoded form - '%40' => '@', - '%3A' => ':', - // these chars are only sub-delimiters that have no predefined meaning and can therefore be used literally - // so URI producing applications can use these chars to delimit subcomponents in a path segment without being encoded for better readability - '%3B' => ';', - '%2C' => ',', - '%3D' => '=', - '%2B' => '+', - '%21' => '!', - '%2A' => '*', - '%7C' => '|', - ); - - /** - * @var string This regexp matches all characters that are not or should not be encoded by rawurlencode (see list in array above). + * @deprecated This array should not be used anymore. Changing it can result in url's that are incompatible with Symfony's url-matching, browsers and/or server software. */ - private $urlEncodingSkipRegexp = '#[^-.~a-zA-Z0-9_/@:;,=+!*|]#'; + protected $decodedChars = array(); /** * Constructor. @@ -187,9 +185,14 @@ protected function doGenerate($variables, $defaults, $requirements, $tokens, $pa if ('' === $url) { $url = '/'; - } elseif (preg_match($this->urlEncodingSkipRegexp, $url)) { - // the context base URL is already encoded (see Symfony\Component\HttpFoundation\Request) + } elseif (!empty($this->decodedChars)) { + @trigger_error('The class variable '.__CLASS__.'::$decodedChars is deprecated since version 2.7 and will be removed in 3.1.', E_USER_DEPRECATED); $url = strtr(rawurlencode($url), $this->decodedChars); + } else { + // the context base URL is already encoded (see Symfony\Component\HttpFoundation\Request) + $url = preg_replace_callback(self::PATH_UNSAFE_CHARACTER_REGEXP, function(array $matches){ + return rawurlencode($matches[0]); + }, $url); } // the path segments "." and ".." are interpreted as relative reference when resolving a URI; see http://tools.ietf.org/html/rfc3986#section-3.3