@@ -91,6 +91,7 @@ private static function compilePattern(Route $route, $pattern, $isHost)
91
91
$ matches = array ();
92
92
$ pos = 0 ;
93
93
$ defaultSeparator = $ isHost ? '. ' : '/ ' ;
94
+ $ useUnicode = preg_match ('//u ' , $ pattern );
94
95
95
96
// Match all variables enclosed in "{}" and iterate over them. But we only want to match the innermost variable
96
97
// in case of nested "{}", e.g. {foo{bar}}. This in ensured because \w does not match "{" or "}" itself.
@@ -100,7 +101,15 @@ private static function compilePattern(Route $route, $pattern, $isHost)
100
101
// get all static text preceding the current variable
101
102
$ precedingText = substr ($ pattern , $ pos , $ match [0 ][1 ] - $ pos );
102
103
$ pos = $ match [0 ][1 ] + strlen ($ match [0 ][0 ]);
103
- $ precedingChar = strlen ($ precedingText ) > 0 ? substr ($ precedingText , -1 ) : '' ;
104
+
105
+ if (!strlen ($ precedingText )) {
106
+ $ precedingChar = '' ;
107
+ } elseif ($ useUnicode ) {
108
+ preg_match ('/.$/u ' , $ precedingText , $ precedingChar );
109
+ $ precedingChar = $ precedingChar [0 ];
110
+ } else {
111
+ $ precedingChar = substr ($ precedingText , -1 );
112
+ }
104
113
$ isSeparator = '' !== $ precedingChar && false !== strpos (static ::SEPARATORS , $ precedingChar );
105
114
106
115
if (is_numeric ($ varName )) {
@@ -110,8 +119,8 @@ private static function compilePattern(Route $route, $pattern, $isHost)
110
119
throw new \LogicException (sprintf ('Route pattern "%s" cannot reference variable name "%s" more than once. ' , $ pattern , $ varName ));
111
120
}
112
121
113
- if ($ isSeparator && strlen ( $ precedingText) > 1 ) {
114
- $ tokens [] = array ('text ' , substr ($ precedingText , 0 , -1 ));
122
+ if ($ isSeparator && $ precedingText !== $ precedingChar ) {
123
+ $ tokens [] = array ('text ' , substr ($ precedingText , 0 , -strlen ( $ precedingChar ) ));
115
124
} elseif (!$ isSeparator && strlen ($ precedingText ) > 0 ) {
116
125
$ tokens [] = array ('text ' , $ precedingText );
117
126
}
@@ -126,7 +135,7 @@ private static function compilePattern(Route $route, $pattern, $isHost)
126
135
// If {page} would also match the separating dot, {_format} would never match as {page} will eagerly consume everything.
127
136
// Also even if {_format} was not optional the requirement prevents that {page} matches something that was originally
128
137
// part of {_format} when generating the URL, e.g. _format = 'mobile.html'.
129
- $ nextSeparator = self ::findNextSeparator ($ followingPattern );
138
+ $ nextSeparator = self ::findNextSeparator ($ followingPattern, $ useUnicode );
130
139
$ regexp = sprintf (
131
140
'[^%s%s]+ ' ,
132
141
preg_quote ($ defaultSeparator , self ::REGEX_DELIMITER ),
@@ -140,6 +149,8 @@ private static function compilePattern(Route $route, $pattern, $isHost)
140
149
// directly adjacent, e.g. '/{x}{y}'.
141
150
$ regexp .= '+ ' ;
142
151
}
152
+ } elseif (!preg_match ('//u ' , $ regexp )) {
153
+ $ useUnicode = false ;
143
154
}
144
155
145
156
$ tokens [] = array ('variable ' , $ isSeparator ? $ precedingChar : '' , $ regexp , $ varName );
@@ -168,10 +179,21 @@ private static function compilePattern(Route $route, $pattern, $isHost)
168
179
for ($ i = 0 , $ nbToken = count ($ tokens ); $ i < $ nbToken ; ++$ i ) {
169
180
$ regexp .= self ::computeRegexp ($ tokens , $ i , $ firstOptional );
170
181
}
182
+ $ regexp = self ::REGEX_DELIMITER .'^ ' .$ regexp .'$ ' .self ::REGEX_DELIMITER .'s ' .($ isHost ? 'i ' : '' );
183
+
184
+ // don't enable Unicode matching if not really required
185
+ if ($ useUnicode && preg_match ('/[\x80-\xFF]|(?:[^ \\\\] \\\\(?: \\\\\\\\)*+(?-i:X|[pP][\{A-Za-z]|x[\{A-Fa-f0-9]))/ ' , $ regexp )) {
186
+ $ regexp .= 'u ' ;
187
+ for ($ i = count ($ tokens ) - 1 ; $ i >= 0 ; --$ i ) {
188
+ if ('variable ' === $ tokens [$ i ][0 ]) {
189
+ $ tokens [$ i ][] = true ;
190
+ }
191
+ }
192
+ }
171
193
172
194
return array (
173
195
'staticPrefix ' => 'text ' === $ tokens [0 ][0 ] ? $ tokens [0 ][1 ] : '' ,
174
- 'regex ' => self :: REGEX_DELIMITER . ' ^ ' . $ regexp. ' $ ' . self :: REGEX_DELIMITER . ' s ' .( $ isHost ? ' i ' : '' ) ,
196
+ 'regex ' => $ regexp ,
175
197
'tokens ' => array_reverse ($ tokens ),
176
198
'variables ' => $ variables ,
177
199
);
@@ -180,20 +202,26 @@ private static function compilePattern(Route $route, $pattern, $isHost)
180
202
/**
181
203
* Returns the next static character in the Route pattern that will serve as a separator.
182
204
*
183
- * @param string $pattern The route pattern
205
+ * @param string $pattern The route pattern
206
+ * @param bool $useUnicode Whether the character is encoded in unicode or not
184
207
*
185
208
* @return string The next static character that functions as separator (or empty string when none available)
186
209
*/
187
- private static function findNextSeparator ($ pattern )
210
+ private static function findNextSeparator ($ pattern, $ useUnicode )
188
211
{
189
212
if ('' == $ pattern ) {
190
213
// return empty string if pattern is empty or false (false which can be returned by substr)
191
214
return '' ;
192
215
}
193
216
// first remove all placeholders from the pattern so we can find the next real static character
194
- $ pattern = preg_replace ('#\{\w+\}# ' , '' , $ pattern );
217
+ if ('' === $ pattern = preg_replace ('#\{\w+\}# ' , '' , $ pattern )) {
218
+ return '' ;
219
+ }
220
+ if ($ useUnicode ) {
221
+ preg_match ('/^./u ' , $ pattern , $ pattern );
222
+ }
195
223
196
- return isset ( $ pattern [ 0 ]) && false !== strpos (static ::SEPARATORS , $ pattern [0 ]) ? $ pattern [0 ] : '' ;
224
+ return false !== strpos (static ::SEPARATORS , $ pattern [0 ]) ? $ pattern [0 ] : '' ;
197
225
}
198
226
199
227
/**
0 commit comments