20
20
class RouteCompiler implements RouteCompilerInterface
21
21
{
22
22
const REGEX_DELIMITER = '# ' ;
23
+ const HOST_SEPARATOR = '. ' ;
24
+ const PATH_SEPARATOR = '/ ' ;
23
25
24
26
/**
25
27
* This string defines the characters that are automatically considered separators in front of
@@ -39,15 +41,13 @@ public static function compile(Route $route)
39
41
{
40
42
$ staticPrefix = null ;
41
43
$ hostVariables = array ();
42
- $ pathVariables = array ();
43
44
$ variables = array ();
44
- $ tokens = array ();
45
45
$ regex = null ;
46
46
$ hostRegex = null ;
47
47
$ hostTokens = array ();
48
48
49
49
if ('' !== $ host = $ route ->getHost ()) {
50
- $ result = self ::compilePattern ($ route , $ host, true );
50
+ $ result = self ::compileHostPattern ($ route , $ host );
51
51
52
52
$ hostVariables = $ result ['variables ' ];
53
53
$ variables = array_merge ($ variables , $ hostVariables );
@@ -58,7 +58,7 @@ public static function compile(Route $route)
58
58
59
59
$ path = $ route ->getPath ();
60
60
61
- $ result = self ::compilePattern ($ route , $ path, false );
61
+ $ result = self ::compilePathPattern ($ route , $ path );
62
62
63
63
$ staticPrefix = $ result ['staticPrefix ' ];
64
64
@@ -80,31 +80,115 @@ public static function compile(Route $route)
80
80
);
81
81
}
82
82
83
- private static function compilePattern (Route $ route , $ pattern , $ isHost )
83
+ /**
84
+ * Compile route host pattern
85
+ *
86
+ * @param Route $route
87
+ * @param string $pattern
88
+ *
89
+ * @return array
90
+ * @throws \LogicException
91
+ * @throws \DomainException
92
+ */
93
+ private static function compileHostPattern (Route $ route , $ pattern )
94
+ {
95
+ $ tokens = array ();
96
+ $ variables = array ();
97
+ $ pos = strlen ($ pattern );
98
+
99
+ foreach (array_reverse (self ::getVariableMatches ($ pattern )) as $ match ) {
100
+ $ varName = substr ($ match [0 ][0 ], 1 , -1 );
101
+ // get all static text following the current variable
102
+ $ variableEnd = $ match [0 ][1 ] + strlen ($ match [0 ][0 ]);
103
+ $ sequentText = substr ($ pattern , $ variableEnd , $ pos - $ variableEnd );
104
+ $ pos = $ match [0 ][1 ];
105
+ $ sequentChar = strlen ($ sequentText ) > 0 ? substr ($ sequentText , 0 , 1 ) : '' ;
106
+ $ isSeparator = '' !== $ sequentChar && false !== strpos (static ::SEPARATORS , $ sequentChar );
107
+
108
+ self ::validateVariable ($ pattern , $ varName , $ variables );
109
+
110
+ if ($ isSeparator && strlen ($ sequentText ) > 1 ) {
111
+ $ tokens [] = array ('text ' , substr ($ sequentText , 1 ));
112
+ } elseif (!$ isSeparator && strlen ($ sequentText ) > 0 ) {
113
+ $ tokens [] = array ('text ' , $ sequentText );
114
+ }
115
+
116
+ $ regexp = $ route ->getRequirement ($ varName );
117
+ if (null === $ regexp ) {
118
+ $ precedingPattern = (string ) substr ($ pattern , 0 , $ pos );
119
+ // Find the next static character after the variable that functions as a separator. By default, this separator and '/'
120
+ // are disallowed for the variable. This default requirement makes sure that optional variables can be matched at all
121
+ // and that the generating-matching-combination of URLs unambiguous, i.e. the params used for generating the URL are
122
+ // the same that will be matched. Example: new Route('/{page}.{_format}', array('_format' => 'html'))
123
+ // If {page} would also match the separating dot, {_format} would never match as {page} will eagerly consume everything.
124
+ // Also even if {_format} was not optional the requirement prevents that {page} matches something that was originally
125
+ // part of {_format} when generating the URL, e.g. _format = 'mobile.html'.
126
+ $ previousSeparator = self ::findPreviousSeparator ($ precedingPattern );
127
+ $ regexp = sprintf (
128
+ '[^%s%s]+ ' ,
129
+ self ::HOST_SEPARATOR !== $ previousSeparator && '' !== $ previousSeparator ? preg_quote ($ previousSeparator , self ::REGEX_DELIMITER ) : '' ,
130
+ preg_quote (self ::HOST_SEPARATOR , self ::REGEX_DELIMITER )
131
+ );
132
+
133
+ if (('' !== $ previousSeparator && !preg_match ('#\{\w+\}$# ' , $ precedingPattern )) || '' === $ precedingPattern ) {
134
+ // When we have a separator, which is disallowed for the variable, we can optimize the regex with a possessive
135
+ // quantifier. This prevents useless backtracking of PCRE and improves performance by 20% for matching those patterns.
136
+ // Given the above example, there is no point in backtracking into {page} (that forbids the dot) when a dot must follow
137
+ // after it. This optimization cannot be applied when the next char is no real separator or when the next variable is
138
+ // directly adjacent, e.g. '/{x}{y}'.
139
+ $ regexp .= '+ ' ;
140
+ }
141
+ }
142
+
143
+ $ tokens [] = array ('variable ' , $ isSeparator ? $ sequentChar : '' , $ regexp , $ varName );
144
+ $ variables [] = $ varName ;
145
+ }
146
+
147
+ if ($ pos > 0 ) {
148
+ $ tokens [] = array ('text ' , substr ($ pattern , 0 , $ pos ));
149
+ }
150
+
151
+ $ firstOptional = self ::getFirstOptionalKey ($ route , $ tokens );
152
+
153
+ // compute the matching regexp
154
+ $ regexp = '' ;
155
+ for ($ i = 0 , $ nbToken = count ($ tokens ); $ i < $ nbToken ; $ i ++) {
156
+ $ regexp = self ::computeHostRegexp ($ tokens , $ i , $ firstOptional , true ) . $ regexp ;
157
+ }
158
+
159
+ return array (
160
+ 'staticPrefix ' => 'text ' === $ tokens [0 ][0 ] ? $ tokens [0 ][1 ] : '' ,
161
+ 'regex ' => self ::REGEX_DELIMITER .'^ ' .$ regexp .'$ ' .self ::REGEX_DELIMITER .'s ' ,
162
+ 'tokens ' => array_reverse ($ tokens ),
163
+ 'variables ' => array_reverse ($ variables ),
164
+ );
165
+ }
166
+
167
+ /**
168
+ * Compile route path pattern
169
+ *
170
+ * @param Route $route
171
+ * @param string $pattern
172
+ *
173
+ * @return array
174
+ * @throws \LogicException
175
+ * @throws \DomainException
176
+ */
177
+ private static function compilePathPattern (Route $ route , $ pattern )
84
178
{
85
179
$ tokens = array ();
86
180
$ variables = array ();
87
- $ matches = array ();
88
181
$ pos = 0 ;
89
- $ defaultSeparator = $ isHost ? '. ' : '/ ' ;
90
182
91
- // Match all variables enclosed in "{}" and iterate over them. But we only want to match the innermost variable
92
- // in case of nested "{}", e.g. {foo{bar}}. This in ensured because \w does not match "{" or "}" itself.
93
- preg_match_all ('#\{\w+\}# ' , $ pattern , $ matches , PREG_OFFSET_CAPTURE | PREG_SET_ORDER );
94
- foreach ($ matches as $ match ) {
183
+ foreach (self ::getVariableMatches ($ pattern ) as $ match ) {
95
184
$ varName = substr ($ match [0 ][0 ], 1 , -1 );
96
185
// get all static text preceding the current variable
97
186
$ precedingText = substr ($ pattern , $ pos , $ match [0 ][1 ] - $ pos );
98
187
$ pos = $ match [0 ][1 ] + strlen ($ match [0 ][0 ]);
99
188
$ precedingChar = strlen ($ precedingText ) > 0 ? substr ($ precedingText , -1 ) : '' ;
100
189
$ isSeparator = '' !== $ precedingChar && false !== strpos (static ::SEPARATORS , $ precedingChar );
101
190
102
- if (is_numeric ($ varName )) {
103
- throw new \DomainException (sprintf ('Variable name "%s" cannot be numeric in route pattern "%s". Please use a different name. ' , $ varName , $ pattern ));
104
- }
105
- if (in_array ($ varName , $ variables )) {
106
- throw new \LogicException (sprintf ('Route pattern "%s" cannot reference variable name "%s" more than once. ' , $ pattern , $ varName ));
107
- }
191
+ self ::validateVariable ($ pattern , $ varName , $ variables );
108
192
109
193
if ($ isSeparator && strlen ($ precedingText ) > 1 ) {
110
194
$ tokens [] = array ('text ' , substr ($ precedingText , 0 , -1 ));
@@ -125,8 +209,8 @@ private static function compilePattern(Route $route, $pattern, $isHost)
125
209
$ nextSeparator = self ::findNextSeparator ($ followingPattern );
126
210
$ regexp = sprintf (
127
211
'[^%s%s]+ ' ,
128
- preg_quote ($ defaultSeparator , self ::REGEX_DELIMITER ),
129
- $ defaultSeparator !== $ nextSeparator && '' !== $ nextSeparator ? preg_quote ($ nextSeparator , self ::REGEX_DELIMITER ) : ''
212
+ preg_quote (self :: PATH_SEPARATOR , self ::REGEX_DELIMITER ),
213
+ self :: PATH_SEPARATOR !== $ nextSeparator && '' !== $ nextSeparator ? preg_quote ($ nextSeparator , self ::REGEX_DELIMITER ) : ''
130
214
);
131
215
if (('' !== $ nextSeparator && !preg_match ('#^\{\w+\}# ' , $ followingPattern )) || '' === $ followingPattern ) {
132
216
// When we have a separator, which is disallowed for the variable, we can optimize the regex with a possessive
@@ -146,23 +230,12 @@ private static function compilePattern(Route $route, $pattern, $isHost)
146
230
$ tokens [] = array ('text ' , substr ($ pattern , $ pos ));
147
231
}
148
232
149
- // find the first optional token
150
- $ firstOptional = PHP_INT_MAX ;
151
- if (!$ isHost ) {
152
- for ($ i = count ($ tokens ) - 1 ; $ i >= 0 ; $ i --) {
153
- $ token = $ tokens [$ i ];
154
- if ('variable ' === $ token [0 ] && $ route ->hasDefault ($ token [3 ])) {
155
- $ firstOptional = $ i ;
156
- } else {
157
- break ;
158
- }
159
- }
160
- }
233
+ $ firstOptional = self ::getFirstOptionalKey ($ route , $ tokens );
161
234
162
235
// compute the matching regexp
163
236
$ regexp = '' ;
164
237
for ($ i = 0 , $ nbToken = count ($ tokens ); $ i < $ nbToken ; $ i ++) {
165
- $ regexp .= self ::computeRegexp ($ tokens , $ i , $ firstOptional );
238
+ $ regexp .= self ::computePathRegexp ($ tokens , $ i , $ firstOptional, false );
166
239
}
167
240
168
241
return array (
@@ -173,6 +246,52 @@ private static function compilePattern(Route $route, $pattern, $isHost)
173
246
);
174
247
}
175
248
249
+ /**
250
+ * Validate route pattern variable
251
+ *
252
+ * @param string $pattern Route pattern
253
+ * @param string $varName Route variable
254
+ * @param array $variables Already used variables
255
+ *
256
+ * @return bool
257
+ * @throws \LogicException
258
+ * @throws \DomainException
259
+ */
260
+ private static function validateVariable ($ pattern , $ varName , $ variables )
261
+ {
262
+ if (is_numeric ($ varName )) {
263
+ throw new \DomainException (sprintf ('Variable name "%s" cannot be numeric in route pattern "%s". Please use a different name. ' , $ varName , $ pattern ));
264
+ }
265
+ if (in_array ($ varName , $ variables )) {
266
+ throw new \LogicException (sprintf ('Route pattern "%s" cannot reference variable name "%s" more than once. ' , $ pattern , $ varName ));
267
+ }
268
+
269
+ return true ;
270
+ }
271
+
272
+ /**
273
+ * Find the first optional token
274
+ *
275
+ * @param Route $route
276
+ * @param array $tokens
277
+ *
278
+ * @return int
279
+ */
280
+ private static function getFirstOptionalKey (Route $ route , array $ tokens )
281
+ {
282
+ $ firstOptional = PHP_INT_MAX ;
283
+ for ($ i = count ($ tokens ) - 1 ; $ i >= 0 ; $ i --) {
284
+ $ token = $ tokens [$ i ];
285
+ if ('variable ' === $ token [0 ] && $ route ->hasDefault ($ token [3 ])) {
286
+ $ firstOptional = $ i ;
287
+ } else {
288
+ break ;
289
+ }
290
+ }
291
+
292
+ return $ firstOptional ;
293
+ }
294
+
176
295
/**
177
296
* Returns the next static character in the Route pattern that will serve as a separator.
178
297
*
@@ -193,15 +312,35 @@ private static function findNextSeparator($pattern)
193
312
}
194
313
195
314
/**
196
- * Computes the regexp used to match a specific token. It can be static text or a subpattern.
315
+ * Returns the previous static character in the Route pattern that will serve as a separator.
316
+ *
317
+ * @param string $pattern The route pattern
318
+ *
319
+ * @return string The previous static character that functions as separator (or empty string when none available)
320
+ */
321
+ private static function findPreviousSeparator ($ pattern )
322
+ {
323
+ if ('' == $ pattern ) {
324
+ // return empty string if pattern is empty or false (false which can be returned by substr)
325
+ return '' ;
326
+ }
327
+ // first remove all placeholders from the pattern so we can find the next real static character
328
+ $ pattern = preg_replace ('#\{\w+\}(?!.*\{\w+\})# ' , '' , $ pattern );
329
+ $ separator = substr ($ pattern , -1 );
330
+
331
+ return !empty ($ separator ) && false !== strpos (static ::SEPARATORS , $ separator ) ? $ separator : '' ;
332
+ }
333
+
334
+ /**
335
+ * Computes the regexp used to match a specific path token. It can be static text or a subpattern.
197
336
*
198
337
* @param array $tokens The route tokens
199
338
* @param integer $index The index of the current token
200
339
* @param integer $firstOptional The index of the first optional token
201
340
*
202
- * @return string The regexp pattern for a single token
341
+ * @return string The path regexp pattern for a single token
203
342
*/
204
- private static function computeRegexp (array $ tokens , $ index , $ firstOptional )
343
+ private static function computePathRegexp (array $ tokens , $ index , $ firstOptional )
205
344
{
206
345
$ token = $ tokens [$ index ];
207
346
if ('text ' === $ token [0 ]) {
@@ -230,4 +369,59 @@ private static function computeRegexp(array $tokens, $index, $firstOptional)
230
369
}
231
370
}
232
371
}
372
+
373
+ /**
374
+ * Computes the regexp used to match a specific host token. It can be static text or a subpattern.
375
+ *
376
+ * @param array $tokens The route tokens
377
+ * @param integer $index The index of the current token
378
+ * @param integer $firstOptional The index of the first optional token
379
+ *
380
+ * @return string The host regexp pattern for a single token
381
+ */
382
+ private static function computeHostRegexp (array $ tokens , $ index , $ firstOptional )
383
+ {
384
+ $ token = $ tokens [$ index ];
385
+ if ('text ' === $ token [0 ]) {
386
+ // Text tokens
387
+ return preg_quote ($ token [1 ], self ::REGEX_DELIMITER );
388
+ } else {
389
+ // Variable tokens
390
+ if (0 === $ index && 0 === $ firstOptional ) {
391
+ // When the only token is an optional variable token, the separator is required
392
+ return sprintf ('(?P<%s>%s)%s? ' , $ token [3 ], $ token [2 ], preg_quote ($ token [1 ], self ::REGEX_DELIMITER ));
393
+ } else {
394
+ $ regexp = sprintf ('(?P<%s>%s)%s ' , $ token [3 ], $ token [2 ], preg_quote ($ token [1 ], self ::REGEX_DELIMITER ));
395
+ if ($ index >= $ firstOptional ) {
396
+ // Enclose each optional token in a subpattern to make it optional.
397
+ // "?:" means it is non-capturing, i.e. the portion of the subject string that
398
+ // matched the optional subpattern is not passed back.
399
+ $ regexp = "$ regexp)? " ;
400
+ $ nbTokens = count ($ tokens );
401
+ if ($ nbTokens - 1 == $ index ) {
402
+ // Close the optional subpatterns
403
+ $ regexp = str_repeat ("(?: " , $ nbTokens - $ firstOptional - (0 === $ firstOptional ? 1 : 0 )) . $ regexp ;
404
+ }
405
+ }
406
+
407
+ return $ regexp ;
408
+ }
409
+ }
410
+ }
411
+
412
+ /**
413
+ * Gets all variable matches
414
+ *
415
+ * @param string $pattern
416
+ *
417
+ * @return array
418
+ */
419
+ private static function getVariableMatches ($ pattern )
420
+ {
421
+ // Match all variables enclosed in "{}" and iterate over them. But we only want to match the innermost variable
422
+ // in case of nested "{}", e.g. {foo{bar}}. This in ensured because \w does not match "{" or "}" itself.
423
+ preg_match_all ('#\{\w+\}# ' , $ pattern , $ matches , PREG_OFFSET_CAPTURE | PREG_SET_ORDER );
424
+
425
+ return $ matches ;
426
+ }
233
427
}
0 commit comments