@@ -91,6 +91,16 @@ private static function compilePattern(Route $route, $pattern, $isHost)
91
91
$ matches = array ();
92
92
$ pos = 0 ;
93
93
$ defaultSeparator = $ isHost ? '. ' : '/ ' ;
94
+ $ useUtf8 = preg_match ('//u ' , $ pattern );
95
+ $ needsUtf8 = $ route ->getOption ('utf8 ' );
96
+
97
+ if (!$ needsUtf8 && $ useUtf8 && preg_match ('/[\x80-\xFF]/ ' , $ pattern )) {
98
+ $ needsUtf8 = true ;
99
+ @trigger_error (sprintf ('Using UTF-8 route patterns without setting the "utf8" option is deprecated since Symfony 3.2 and will throw a LogicException in 4.0. Turn on the "utf8" route option for pattern "%s". ' , $ pattern ), E_USER_DEPRECATED );
100
+ }
101
+ if (!$ useUtf8 && $ needsUtf8 ) {
102
+ throw new \LogicException (sprintf ('Cannot mix UTF-8 requirements with non-UTF-8 pattern "%s". ' , $ pattern ));
103
+ }
94
104
95
105
// Match all variables enclosed in "{}" and iterate over them. But we only want to match the innermost variable
96
106
// in case of nested "{}", e.g. {foo{bar}}. This in ensured because \w does not match "{" or "}" itself.
@@ -100,7 +110,15 @@ private static function compilePattern(Route $route, $pattern, $isHost)
100
110
// get all static text preceding the current variable
101
111
$ precedingText = substr ($ pattern , $ pos , $ match [0 ][1 ] - $ pos );
102
112
$ pos = $ match [0 ][1 ] + strlen ($ match [0 ][0 ]);
103
- $ precedingChar = strlen ($ precedingText ) > 0 ? substr ($ precedingText , -1 ) : '' ;
113
+
114
+ if (!strlen ($ precedingText )) {
115
+ $ precedingChar = '' ;
116
+ } elseif ($ useUtf8 ) {
117
+ preg_match ('/.$/u ' , $ precedingText , $ precedingChar );
118
+ $ precedingChar = $ precedingChar [0 ];
119
+ } else {
120
+ $ precedingChar = substr ($ precedingText , -1 );
121
+ }
104
122
$ isSeparator = '' !== $ precedingChar && false !== strpos (static ::SEPARATORS , $ precedingChar );
105
123
106
124
if (is_numeric ($ varName )) {
@@ -110,8 +128,8 @@ private static function compilePattern(Route $route, $pattern, $isHost)
110
128
throw new \LogicException (sprintf ('Route pattern "%s" cannot reference variable name "%s" more than once. ' , $ pattern , $ varName ));
111
129
}
112
130
113
- if ($ isSeparator && strlen ( $ precedingText) > 1 ) {
114
- $ tokens [] = array ('text ' , substr ($ precedingText , 0 , -1 ));
131
+ if ($ isSeparator && $ precedingText !== $ precedingChar ) {
132
+ $ tokens [] = array ('text ' , substr ($ precedingText , 0 , -strlen ( $ precedingChar ) ));
115
133
} elseif (!$ isSeparator && strlen ($ precedingText ) > 0 ) {
116
134
$ tokens [] = array ('text ' , $ precedingText );
117
135
}
@@ -126,7 +144,7 @@ private static function compilePattern(Route $route, $pattern, $isHost)
126
144
// If {page} would also match the separating dot, {_format} would never match as {page} will eagerly consume everything.
127
145
// Also even if {_format} was not optional the requirement prevents that {page} matches something that was originally
128
146
// part of {_format} when generating the URL, e.g. _format = 'mobile.html'.
129
- $ nextSeparator = self ::findNextSeparator ($ followingPattern );
147
+ $ nextSeparator = self ::findNextSeparator ($ followingPattern, $ useUtf8 );
130
148
$ regexp = sprintf (
131
149
'[^%s%s]+ ' ,
132
150
preg_quote ($ defaultSeparator , self ::REGEX_DELIMITER ),
@@ -140,6 +158,16 @@ private static function compilePattern(Route $route, $pattern, $isHost)
140
158
// directly adjacent, e.g. '/{x}{y}'.
141
159
$ regexp .= '+ ' ;
142
160
}
161
+ } else {
162
+ if (!preg_match ('//u ' , $ regexp )) {
163
+ $ useUtf8 = false ;
164
+ } elseif (!$ needsUtf8 && preg_match ('/[\x80-\xFF]|(?<! \\\\) \\\\(?: \\\\\\\\)*+(?-i:X|[pP][\{CLMNPSZ]|x\{[A-Fa-f0-9]{3})/ ' , $ regexp )) {
165
+ $ needsUtf8 = true ;
166
+ @trigger_error (sprintf ('Using UTF-8 route requirements without setting the "utf8" option is deprecated since Symfony 3.2 and will throw a LogicException in 4.0. Turn on the "utf8" route option for pattern "%s". ' , $ pattern ), E_USER_DEPRECATED );
167
+ }
168
+ if (!$ useUtf8 && $ needsUtf8 ) {
169
+ throw new \LogicException (sprintf ('Cannot mix UTF-8 requirement with non-UTF-8 charset for variable "%s" in pattern "%s". ' , $ varName , $ pattern ));
170
+ }
143
171
}
144
172
145
173
$ tokens [] = array ('variable ' , $ isSeparator ? $ precedingChar : '' , $ regexp , $ varName );
@@ -168,10 +196,21 @@ private static function compilePattern(Route $route, $pattern, $isHost)
168
196
for ($ i = 0 , $ nbToken = count ($ tokens ); $ i < $ nbToken ; ++$ i ) {
169
197
$ regexp .= self ::computeRegexp ($ tokens , $ i , $ firstOptional );
170
198
}
199
+ $ regexp = self ::REGEX_DELIMITER .'^ ' .$ regexp .'$ ' .self ::REGEX_DELIMITER .'s ' .($ isHost ? 'i ' : '' );
200
+
201
+ // enable Utf8 matching if really required
202
+ if ($ needsUtf8 ) {
203
+ $ regexp .= 'u ' ;
204
+ for ($ i = 0 , $ nbToken = count ($ tokens ); $ i < $ nbToken ; ++$ i ) {
205
+ if ('variable ' === $ tokens [$ i ][0 ]) {
206
+ $ tokens [$ i ][] = true ;
207
+ }
208
+ }
209
+ }
171
210
172
211
return array (
173
212
'staticPrefix ' => 'text ' === $ tokens [0 ][0 ] ? $ tokens [0 ][1 ] : '' ,
174
- 'regex ' => self :: REGEX_DELIMITER . ' ^ ' . $ regexp. ' $ ' . self :: REGEX_DELIMITER . ' s ' .( $ isHost ? ' i ' : '' ) ,
213
+ 'regex ' => $ regexp ,
175
214
'tokens ' => array_reverse ($ tokens ),
176
215
'variables ' => $ variables ,
177
216
);
@@ -181,19 +220,25 @@ private static function compilePattern(Route $route, $pattern, $isHost)
181
220
* Returns the next static character in the Route pattern that will serve as a separator.
182
221
*
183
222
* @param string $pattern The route pattern
223
+ * @param bool $useUtf8 Whether the character is encoded in UTF-8 or not
184
224
*
185
225
* @return string The next static character that functions as separator (or empty string when none available)
186
226
*/
187
- private static function findNextSeparator ($ pattern )
227
+ private static function findNextSeparator ($ pattern, $ useUtf8 )
188
228
{
189
229
if ('' == $ pattern ) {
190
230
// return empty string if pattern is empty or false (false which can be returned by substr)
191
231
return '' ;
192
232
}
193
233
// first remove all placeholders from the pattern so we can find the next real static character
194
- $ pattern = preg_replace ('#\{\w+\}# ' , '' , $ pattern );
234
+ if ('' === $ pattern = preg_replace ('#\{\w+\}# ' , '' , $ pattern )) {
235
+ return '' ;
236
+ }
237
+ if ($ useUtf8 ) {
238
+ preg_mat
10000
ch ('/^./u ' , $ pattern , $ pattern );
239
+ }
195
240
196
- return isset ( $ pattern [ 0 ]) && false !== strpos (static ::SEPARATORS , $ pattern [0 ]) ? $ pattern [0 ] : '' ;
241
+ return false !== strpos (static ::SEPARATORS , $ pattern [0 ]) ? $ pattern [0 ] : '' ;
197
242
}
198
243
199
244
/**
0 commit comments