@@ -91,6 +91,8 @@ private static function compilePattern(Route $route, $pattern, $isHost)
91
91
$ matches = array ();
92
92
$ pos = 0 ;
93
93
$ defaultSeparator = $ isHost ? '. ' : '/ ' ;
94
+ $ useUnicode = preg_match ('//u ' , $ pattern );
95
+ $ needsUnicode = $ useUnicode && preg_match ('/[\x80-\xFF]/ ' , $ pattern );
94
96
95
97
// Match all variables enclosed in "{}" and iterate over them. But we only want to match the innermost variable
96
98
// in case of nested "{}", e.g. {foo{bar}}. This in ensured because \w does not match "{" or "}" itself.
@@ -100,7 +102,15 @@ private static function compilePattern(Route $route, $pattern, $isHost)
100
102
// get all static text preceding the current variable
101
103
$ precedingText = substr ($ pattern , $ pos , $ match [0 ][1 ] - $ pos );
102
104
$ pos = $ match [0 ][1 ] + strlen ($ match [0 ][0 ]);
103
- $ precedingChar = strlen ($ precedingText ) > 0 ? substr ($ precedingText , -1 ) : '' ;
105
+
106
+ if (!strlen ($ precedingText )) {
107
+ $ precedingChar = '' ;
108
+ } elseif ($ useUnicode ) {
109
+ preg_match ('/.$/u ' , $ precedingText , $ precedingChar );
110
+ $ precedingChar = $ precedingChar [0 ];
111
+ } else {
112
+ $ precedingChar = substr ($ precedingText , -1 );
113
+ }
104
114
$ isSeparator = '' !== $ precedingChar && false !== strpos (static ::SEPARATORS , $ precedingChar );
105
115
106
116
if (is_numeric ($ varName )) {
@@ -110,8 +120,8 @@ private static function compilePattern(Route $route, $pattern, $isHost)
110
120
throw new \LogicException (sprintf ('Route pattern "%s" cannot reference variable name "%s" more than once. ' , $ pattern , $ varName ));
111
121
}
112
122
113
- if ($ isSeparator && strlen ( $ precedingText) > 1 ) {
114
- $ tokens [] = array ('text ' , substr ($ precedingText , 0 , -1 ));
123
+ if ($ isSeparator && $ precedingText !== $ precedingChar ) {
124
+ $ tokens [] = array ('text ' , substr ($ precedingText , 0 , -strlen ( $ precedingChar ) ));
115
125
} elseif (!$ isSeparator && strlen ($ precedingText ) > 0 ) {
116
126
$ tokens [] = array ('text ' , $ precedingText );
117
127
}
@@ -126,7 +136,7 @@ private static function compilePattern(Route $route, $pattern, $isHost)
126
136
// If {page} would also match the separating dot, {_format} would never match as {page} will eagerly consume everything.
127
137
// Also even if {_format} was not optional the requirement prevents that {page} matches something that was originally
128
138
// part of {_format} when generating the URL, e.g. _format = 'mobile.html'.
129
- $ nextSeparator = self ::findNextSeparator ($ followingPattern );
139
+ $ nextSeparator = self ::findNextSeparator ($ followingPattern, $ useUnicode );
130
140
$ regexp = sprintf (
131
141
'[^%s%s]+ ' ,
132
142
preg_quote ($ defaultSeparator , self ::REGEX_DELIMITER ),
@@ -140,6 +150,19 @@ private static function compilePattern(Route $route, $pattern, $isHost)
140
150
// directly adjacent, e.g. '/{x}{y}'.
141
151
$ regexp .= '+ ' ;
142
152
}
153
+ } else {
154
+ if (0 === strpos ($ regexp , '* ' )) {
155
+ $ needsUnicode = true ;
156
+ $ regexp = substr ($ regexp , 1 );
157
+ }
158
+ if (!preg_match ('//u ' , $ regexp )) {
159
+ $ useUnicode = false ;
160
+ } elseif (!$ needsUnicode && preg_match ('/[\x80-\xFF]|(?<! \\\\) \\\\(?: \\\\\\\\)*+(?-i:X|[pP][\{CLMNPSZ]|x\{)/ ' , $ regexp )) {
161
+ $ needsUnicode = true ;
162
+ }
163
+ if (!$ useUnicode && $ needsUnicode ) {
164
+ throw new \LogicException (sprintf ('Cannot mix unicode requirement with non-UTF-8 charset for variable "%s" in pattern "%s". ' , $ varName , $ pattern ));
165
+ }
143
166
}
144
167
145
168
$ tokens [] = array ('variable ' , $ isSeparator ? $ precedingChar : '' , $ regexp , $ varName );
@@ -168,10 +191,21 @@ private static function compilePattern(Route $route, $pattern, $isHost)
168
191
for ($ i = 0 , $ nbToken = count ($ tokens ); $ i < $ nbToken ; ++$ i ) {
169
192
$ regexp .= self ::computeRegexp ($ tokens , $ i , $ firstOptional );
170
193
}
194
+ $ regexp = self ::REGEX_DELIMITER .'^ ' .$ regexp .'$ ' .self ::REGEX_DELIMITER .'s ' .($ isHost ? 'i ' : '' );
195
+
196
+ // enable Unicode matching if really required
197
+ if ($ needsUnicode ) {
198
+ $ regexp .= 'u ' ;
199
+ for ($ i = count ($ tokens ) - 1 ; $ i >= 0 ; --$ i ) {
200
+ if ('variable ' === $ tokens [$ i ][0 ]) {
201
+ $ tokens [$ i ][] = true ;
202
+ }
203
+ }
204
+ }
171
205
172
206
return array (
173
207
'staticPrefix ' => 'text ' === $ tokens [0 ][0 ] ? $ tokens [0 ][1 ] : '' ,
174
- 'regex ' => self :: REGEX_DELIMITER . ' ^ ' . $ regexp. ' $ ' . self :: REGEX_DELIMITER . ' s ' .( $ isHost ? ' i ' : '' ) ,
208
+ 'regex ' => $ regexp ,
175
209
'tokens ' => array_reverse ($ tokens ),
176
210
'variables ' => $ variables ,
177
211
);
@@ -180,20 +214,26 @@ private static function compilePattern(Route $route, $pattern, $isHost)
180
214
/**
181
215
* Returns the next static character in the Route pattern that will serve as a separator.
182
216
*
183
- * @param string $pattern The route pattern
217
+ * @param string $pattern The route pattern
218
+ * @param bool $useUnicode Whether the character is encoded in unicode or not
184
219
*
185
220
* @return string The next static character that functions as separator (or empty string when none available)
186
221
*/
187
- private static function findNextSeparator ($ pattern )
222
+ private static function findNextSeparator ($ pattern, $ useUnicode )
188
223
{
189
224
if ('' == $ pattern ) {
190
225
// return empty string if pattern is empty or false (false which can be returned by substr)
191
226
return '' ;
192
227
}
193
228
// first remove all placeholders from the pattern so we can find the next real static character
194
- $ pattern = preg_replace ('#\{\w+\}# ' , '' , $ pattern );
229
+ if ('' === $ pattern = preg_replace ('#\{\w+\}# ' , '' , $ pattern )) {
230
+ return '' ;
231
+ }
232
+ if ($ useUnicode ) {
233
+ preg_match ('/^./u ' , $ pattern , $ pattern );
234
+ }
195
235
196
- return isset ( $ pattern [ 0 ]) && false !== strpos (static ::SEPARATORS , $ pattern [0 ]) ? $ pattern [0 ] : '' ;
236
+ return false !== strpos (static ::SEPARATORS , $ pattern [0 ]) ? $ pattern [0 ] : '' ;
197
237
}
198
238
199
239
/**
0 commit comments