@@ -91,6 +91,16 @@ private static function compilePattern(Route $route, $pattern, $isHost)
9191 $ matches = array ();
9292 $ pos = 0 ;
9393 $ defaultSeparator = $ isHost ? '. ' : '/ ' ;
94+ $ useUtf8 = preg_match ('//u ' , $ pattern );
95+ $ needsUtf8 = $ route ->getOption ('utf8 ' );
96+
97+ if (!$ needsUtf8 && $ useUtf8 && preg_match ('/[\x80-\xFF]/ ' , $ pattern )) {
98+ $ needsUtf8 = true ;
99+ @trigger_error (sprintf ('Using UTF-8 route patterns without setting the "utf8" option is deprecated since Symfony 3.2 and will throw a LogicException in 4.0. Turn on the "utf8" route option for pattern "%s". ' , $ pattern ), E_USER_DEPRECATED );
100+ }
101+ if (!$ useUtf8 && $ needsUtf8 ) {
102+ throw new \LogicException (sprintf ('Cannot mix UTF-8 requirements with non-UTF-8 pattern "%s". ' , $ pattern ));
103+ }
94104
95105 // Match all variables enclosed in "{}" and iterate over them. But we only want to match the innermost variable
96106 // in case of nested "{}", e.g. {foo{bar}}. This in ensured because \w does not match "{" or "}" itself.
@@ -100,7 +110,15 @@ private static function compilePattern(Route $route, $pattern, $isHost)
100110 // get all static text preceding the current variable
101111 $ precedingText = substr ($ pattern , $ pos , $ match [0 ][1 ] - $ pos );
102112 $ pos = $ match [0 ][1 ] + strlen ($ match [0 ][0 ]);
103- $ precedingChar = strlen ($ precedingText ) > 0 ? substr ($ precedingText , -1 ) : '' ;
113+
114+ if (!strlen ($ precedingText )) {
115+ $ precedingChar = '' ;
116+ } elseif ($ useUtf8 ) {
117+ preg_match ('/.$/u ' , $ precedingText , $ precedingChar );
118+ $ precedingChar = $ precedingChar [0 ];
119+ } else {
120+ $ precedingChar = substr ($ precedingText , -1 );
121+ }
104122 $ isSeparator = '' !== $ precedingChar && false !== strpos (static ::SEPARATORS , $ precedingChar );
105123
106124 if (is_numeric ($ varName )) {
@@ -110,8 +128,8 @@ private static function compilePattern(Route $route, $pattern, $isHost)
110128 throw new \LogicException (sprintf ('Route pattern "%s" cannot reference variable name "%s" more than once. ' , $ pattern , $ varName ));
111129 }
112130
113- if ($ isSeparator && strlen ( $ precedingText) > 1 ) {
114- $ tokens [] = array ('text ' , substr ($ precedingText , 0 , -1 ));
131+ if ($ isSeparator && $ precedingText !== $ precedingChar ) {
132+ $ tokens [] = array ('text ' , substr ($ precedingText , 0 , -strlen ( $ precedingChar ) ));
115133 } elseif (!$ isSeparator && strlen ($ precedingText ) > 0 ) {
116134 $ tokens [] = array ('text ' , $ precedingText );
117135 }
@@ -126,7 +144,7 @@ private static function compilePattern(Route $route, $pattern, $isHost)
126144 // If {page} would also match the separating dot, {_format} would never match as {page} will eagerly consume everything.
127145 // Also even if {_format} was not optional the requirement prevents that {page} matches something that was originally
128146 // part of {_format} when generating the URL, e.g. _format = 'mobile.html'.
129- $ nextSeparator = self ::findNextSeparator ($ followingPattern );
147+ $ nextSeparator = self ::findNextSeparator ($ followingPattern, $ useUtf8 );
130148 $ regexp = sprintf (
131149 '[^%s%s]+ ' ,
132150 preg_quote ($ defaultSeparator , self ::REGEX_DELIMITER ),
@@ -140,6 +158,16 @@ private static function compilePattern(Route $route, $pattern, $isHost)
140158 // directly adjacent, e.g. '/{x}{y}'.
141159 $ regexp .= '+ ' ;
142160 }
161+ } else {
162+ if (!preg_match ('//u ' , $ regexp )) {
163+ $ useUtf8 = false ;
164+ } elseif (!$ needsUtf8 && preg_match ('/[\x80-\xFF]|(?<! \\\\) \\\\(?: \\\\\\\\)*+(?-i:X|[pP][\{CLMNPSZ]|x\{[A-Fa-f0-9]{3})/ ' , $ regexp )) {
165+ $ needsUtf8 = true ;
166+ @trigger_error (sprintf ('Using UTF-8 route requirements without setting the "utf8" option is deprecated since Symfony 3.2 and will throw a LogicException in 4.0. Turn on the "utf8" route option for pattern "%s". ' , $ pattern ), E_USER_DEPRECATED );
167+ }
168+ if (!$ useUtf8 && $ needsUtf8 ) {
169+ throw new \LogicException (sprintf ('Cannot mix UTF-8 requirement with non-UTF-8 charset for variable "%s" in pattern "%s". ' , $ varName , $ pattern ));
170+ }
143171 }
144172
145173 $ tokens [] = array ('variable ' , $ isSeparator ? $ precedingChar : '' , $ regexp , $ varName );
@@ -168,10 +196,21 @@ private static function compilePattern(Route $route, $pattern, $isHost)
168196 for ($ i = 0 , $ nbToken = count ($ tokens ); $ i < $ nbToken ; ++$ i ) {
169197 $ regexp .= self ::computeRegexp ($ tokens , $ i , $ firstOptional );
170198 }
199+ $ regexp = self ::REGEX_DELIMITER .'^ ' .$ regexp .'$ ' .self ::REGEX_DELIMITER .'s ' .($ isHost ? 'i ' : '' );
200+
201+ // enable Utf8 matching if really required
202+ if ($ needsUtf8 ) {
203+ $ regexp .= 'u ' ;
204+ for ($ i = 0 , $ nbToken = count ($ tokens ); $ i < $ nbToken ; ++$ i ) {
205+ if ('variable ' === $ tokens [$ i ][0 ]) {
206+ $ tokens [$ i ][] = true ;
207+ }
208+ }
209+ }
171210
172211 return array (
173212 'staticPrefix ' => 'text ' === $ tokens [0 ][0 ] ? $ tokens [0 ][1 ] : '' ,
174- 'regex ' => self :: REGEX_DELIMITER . ' ^ ' . $ regexp. ' $ ' . self :: REGEX_DELIMITER . ' s ' .( $ isHost ? ' i ' : '' ) ,
213+ 'regex ' => $ regexp ,
175214 'tokens ' => array_reverse ($ tokens ),
176215 'variables ' => $ variables ,
177216 );
@@ -181,19 +220,25 @@ private static function compilePattern(Route $route, $pattern, $isHost)
181220 * Returns the next static character in the Route pattern that will serve as a separator.
182221 *
183222 * @param string $pattern The route pattern
223+ * @param bool $useUtf8 Whether the character is encoded in UTF-8 or not
184224 *
185225 * @return string The next static character that functions as separator (or empty string when none available)
186226 */
187- private static function findNextSeparator ($ pattern )
227+ private static function findNextSeparator ($ pattern, $ useUtf8 )
188228 {
189229 if ('' == $ pattern ) {
190230 // return empty string if pattern is empty or false (false which can be returned by substr)
191231 return '' ;
192232 }
193233 // first remove all placeholders from the pattern so we can find the next real static character
194- $ pattern = preg_replace ('#\{\w+\}# ' , '' , $ pattern );
234+ if ('' === $ pattern = preg_replace ('#\{\w+\}# ' , '' , $ pattern )) {
235+ return '' ;
236+ }
237+ if ($ useUtf8 ) {
238+ preg_match ('/^./u ' , $ pattern , $ pattern );
239+ }
195240
196- return isset ( $ pattern [ 0 ]) && false !== strpos (static ::SEPARATORS , $ pattern [0 ]) ? $ pattern [0 ] : '' ;
241+ return false !== strpos (static ::SEPARATORS , $ pattern [0 ]) ? $ pattern [0 ] : '' ;
197242 }
198243
199244 /**
0 commit comments