8000 [Routing] Add seamless support for unicode requirements · symfony/symfony@de3a063 · GitHub
[go: up one dir, main page]

Skip to content

Commit de3a063

Browse files
[Routing] Add seamless support for unicode requirements
1 parent 904279e commit de3a063

File tree

6 files changed

+129
-13
lines changed
10000

6 files changed

+129
-13
lines changed

src/Symfony/Component/Routing/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CHANGELOG
55
-----
66

77
* Added support for `bool`, `int`, `float`, `string`, `list` and `map` defaults in XML configurations.
8+
* Added support for unicode requirements
89

910
2.8.0
1011
-----

src/Symfony/Component/Routing/Generator/UrlGenerator.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ protected function doGenerate($variables, $defaults, $requirements, $tokens, $pa
158158
if ('variable' === $token[0]) {
159159
if (!$optional || !array_key_exists($token[3], $defaults) || null !== $mergedParams[$token[3]] && (string) $mergedParams[$token[3]] !== (string) $defaults[$token[3]]) {
160160
// check requirement
161-
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#', $mergedParams[$token[3]])) {
161+
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#'.(empty($token[4]) ? '' : 'u'), $mergedParams[$token[3]])) {
162162
if ($this->strictRequirements) {
163163
throw new InvalidParameterException(strtr($message, array('{parameter}' => $token[3], '{route}' => $name, '{expected}' => $token[2], '{given}' => $mergedParams[$token[3]])));
164164
}
@@ -212,7 +212,7 @@ protected function doGenerate($variables, $defaults, $requirements, $tokens, $pa
212212
$routeHost = '';
213213
foreach ($hostTokens as $token) {
214214
if ('variable' === $token[0]) {
215-
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#i', $mergedParams[$token[3]])) {
215+
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#i'.(empty($token[4]) ? '' : 'u'), $mergedParams[$token[3]])) {
216216
if ($this->strictRequirements) {
217217
throw new InvalidParameterException(strtr($message, array('{parameter}' => $token[3], '{route}' => $name, '{expected}' => $token[2], '{given}' => $mergedParams[$token[3]])));
218218
}

src/Symfony/Component/Routing/Matcher/Dumper/PhpMatcherDumper.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,9 @@ private function compileRoute(Route $route, $name, $supportsRedirections, $paren
223223
}
224224

225225
$supportsTrailingSlash = $supportsRedirections && (!$methods || in_array('HEAD', $methods));
226+
$regex = $compiledRoute->getRegex();
226227

227-
if (!count($compiledRoute->getPathVariables()) && false !== preg_match('#^(.)\^(?P<url>.*?)\$\1#', $compiledRoute->getRegex(), $m)) {
228+
if (!count($compiledRoute->getPathVariables()) && false !== preg_match('#^(.)\^(?P<url>.*?)\$\1#'.(substr($regex, -1) === 'u' ? 'u' : ''), $regex, $m)) {
228229
if ($supportsTrailingSlash && substr($m['url'], -1) === '/') {
229230
$conditions[] = sprintf("rtrim(\$pathinfo, '/') === %s", var_export(rtrim(str_replace('\\', '', $m['url']), '/'), true));
230231
$hasTrailingSlash = true;
@@ -236,7 +237,6 @@ private function compileRoute(Route $route, $name, $supportsRedirections, $paren
236237
$conditions[] = sprintf('0 === strpos($pathinfo, %s)', var_export($compiledRoute->getStaticPrefix(), true));
237238
}
238239

239-
$regex = $compiledRoute->getRegex();
240240
if ($supportsTrailingSlash && $pos = strpos($regex, '/$')) {
241241
$regex = substr($regex, 0, $pos).'/?$'.substr($regex, $pos + 2);
242242
$hasTrailingSlash = true;

src/Symfony/Component/Routing/RouteCompiler.php

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ private static function compilePattern(Route $route, $pattern, $isHost)
9191
$matches = array();
9292
$pos = 0;
9393
$defaultSeparator = $isHost ? '.' : '/';
94+
$useUnicode = preg_match('//u', $pattern);
95+
$needsUnicode = $useUnicode && preg_match('/[\x80-\xFF]/', $pattern);
9496

9597
// Match all variables enclosed in "{}" and iterate over them. But we only want to match the innermost variable
9698
// in case of nested "{}", e.g. {foo{bar}}. This in ensured because \w does not match "{" or "}" itself.
@@ -100,7 +102,15 @@ private static function compilePattern(Route $route, $pattern, $isHost)
100102
// get all static text preceding the current variable
101103
$precedingText = substr($pattern, $pos, $match[0][1] - $pos);
102104
$pos = $match[0][1] + strlen($match[0][0]);
103-
$precedingChar = strlen($precedingText) > 0 ? substr($precedingText, -1) : '';
105+
106+
if (!strlen($precedingText)) {
107+
$precedingChar = '';
108+
} elseif ($useUnicode) {
109+
preg_match('/.$/u', $precedingText, $precedingChar);
110+
$precedingChar = $precedingChar[0];
111+
} else {
112+
$precedingChar = substr($precedingText, -1);
113+
}
104114
$isSeparator = '' !== $precedingChar && false !== strpos(static::SEPARATORS, $precedingChar);
105115

106116
if (is_numeric($varName)) {
@@ -110,8 +120,8 @@ private static function compilePattern(Route $route, $pattern, $isHost)
110120
throw new \LogicException(sprintf('Route pattern "%s" cannot reference variable name "%s" more than once.', $pattern, $varName));
111121
}
112122

113-
if ($isSeparator && strlen($precedingText) > 1) {
114-
$tokens[] = array('text', substr($precedingText, 0, -1));
123+
if ($isSeparator && $precedingText !== $precedingChar) {
124+
$tokens[] = array('text', substr($precedingText, 0, -strlen($precedingChar)));
115125
} elseif (!$isSeparator && strlen($precedingText) > 0) {
116126
$tokens[] = array('text', $precedingText);
117127
}
@@ -126,7 +136,7 @@ private static function compilePattern(Route $route, $pattern, $isHost)
126136
// If {page} would also match the separating dot, {_format} would never match as {page} will eagerly consume everything.
127137
// Also even if {_format} was not optional the requirement prevents that {page} matches something that was originally
128138
// part of {_format} when generating the URL, e.g. _format = 'mobile.html'.
129-
$nextSeparator = self::findNextSeparator($followingPattern);
139+
$nextSeparator = self::findNextSeparator($followingPattern, $useUnicode);
130140
$regexp = sprintf(
131141
'[^%s%s]+',
132142
preg_quote($defaultSeparator, self::REGEX_DELIMITER),
@@ -140,6 +150,19 @@ private static function compilePattern(Route $route, $pattern, $isHost)
140150
// directly adjacent, e.g. '/{x}{y}'.
141151
$regexp .= '+';
142152
}
153+
} else {
154+
if (0 === strpos($regexp, '*')) {
155+
$needsUnicode = true;
156+
$regexp = substr($regexp, 1);
157+
}
158+
if (!preg_match('//u', $regexp)) {
159+
$useUnicode = false;
160+
} elseif (!$needsUnicode && preg_match('/[\x80-\xFF]|(?<!\\\\)\\\\(?:\\\\\\\\)*+(?-i:X|[pP][\{CLMNPSZ]|x\{)/', $regexp)) {
161+
$needsUnicode = true;
162+
}
163+
if (!$useUnicode && $needsUnicode) {
164+
throw new \LogicException(sprintf('Cannot mix unicode requirement with non-UTF-8 charset for variable "%s" in pattern "%s".', $varName, $pattern));
165+
}
143166
}
144167

145168
$tokens[] = array('variable', $isSeparator ? $precedingChar : '', $regexp, $varName);
@@ -168,10 +191,21 @@ private static function compilePattern(Route $route, $pattern, $isHost)
168191
for ($i = 0, $nbToken = count($tokens); $i < $nbToken; ++$i) {
169192
$regexp .= self::computeRegexp($tokens, $i, $firstOptional);
170193
}
194+
$regexp = self::REGEX_DELIMITER.'^'.$regexp.'$'.self::REGEX_DELIMITER.'s'.($isHost ? 'i' : '');
195+
196+
// enable Unicode matching if really required
197+
if ($needsUnicode) {
198+
$regexp .= 'u';
199+
for ($i = count($tokens) - 1; $i >= 0; --$i) {
200+
if ('variable' === $tokens[$i][0]) {
201+
$tokens[$i][] = true;
202+
}
203+
}
204+
}
171205

172206
return array(
173207
'staticPrefix' => 'text' === $tokens[0][0] ? $tokens[0][1] : '',
174-
'regex' => self::REGEX_DELIMITER.'^'.$regexp.'$'.self::REGEX_DELIMITER.'s'.($isHost ? 'i' : ''),
208+
'regex' => $regexp,
175209
'tokens' => array_reverse($tokens),
176210
'variables' => $variables,
177211
);
@@ -180,20 +214,26 @@ private static function compilePattern(Route $route, $pattern, $isHost)
180214
/**
181215
* Returns the next static character in the Route pattern that will serve as a separator.
182216
*
183-
* @param string $pattern The route pattern
217+
* @param string $pattern The route pattern
218+
* @param bool $useUnicode Whether the character is encoded in unicode or not
184219
*
185220
* @return string The next static character that functions as separator (or empty string when none available)
186221
*/
187-
private static function findNextSeparator($pattern)
222+
private static function findNextSeparator($pattern, $useUnicode)
188223
{
189224
if ('' == $pattern) {
190225
// return empty string if pattern is empty or false (false which can be returned by substr)
191226
return '';
192227
}
193228
// first remove all placeholders from the pattern so we can find the next real static character
194-
$pattern = preg_replace('#\{\w+\}#', '', $pattern);
229+
if ('' === $pattern = preg_replace('#\{\w+\}#', '', $pattern)) {
230+
return '';
231+
}
232+
if ($useUnicode) {
233+
preg_match('/^./u', $pattern, $pattern);
234+
}
195235

196-
return isset($pattern[0]) && false !== strpos(static::SEPARATORS, $pattern[0]) ? $pattern[0] : '';
236+
return false !== strpos(static::SEPARATORS, $pattern[0]) ? $pattern[0] : '';
197237
}
198238

199239
/**

src/Symfony/Component/Routing/Tests/Generator/UrlGeneratorTest.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,15 @@ public function testGenerateForRouteWithInvalidMandatoryParameter()
233233
$this->getGenerator($routes)->generate('test', array('foo' => 'bar'), UrlGeneratorInterface::ABSOLUTE_URL);
234234
}
235235

236+
/**
237+
* @expectedException \Symfony\Component\Routing\Exception\InvalidParameterException
238+
*/
239+
public function testGenerateForRouteWithInvalidUnicodeParameter()
240+
{
241+
$routes = $this->getRoutes('test', new Route('/testing/{foo}', array(), array('foo' => '\pL+')));
242+
$this->getGenerator($routes)->generate('test', array('foo' => 'abc123'), UrlGeneratorInterface::ABSOLUTE_URL);
243+
}
244+
236245
/**
237246
* @expectedException \Symfony\Component\Routing\Exception\InvalidParameterException
238247
*/

src/Symfony/Component/Routing/Tests/RouteCompilerTest.php

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
namespace Symfony\Component\Routing\Tests;
1313

1414
use Symfony\Component\Routing\Route;
15+
use Symfony\Component\Routing\RouteCompiler;
1516

1617
class RouteCompilerTest extends \PHPUnit_Framework_TestCase
1718
{
@@ -162,6 +163,56 @@ public function provideCompileData()
162163
array('text', '/foo'),
163164
),
164165
),
166+
167+
array(
168+
'Static non unicode route',
169+
array("/fo\xE9"),
170+
"/fo\xE9", "#^/fo\xE9$#s", array(), array(
171+
array('text', "/fo\xE9"),
172+
),
173+
),
174+
175+
array(
176+
'Static unicode route',
177+
array('/foé'),
178+
'/foé', '#^/foé$#su', array(), array(
179+
array('text', '/foé'),
180+
),
181+
),
182+
183+
array(
184+
'Route with an implicit unicode requirement',
185+
array('/{bar}', array('bar' => null), array('bar' => 'é')),
186+
'', '#^/(?P<bar>é)?$#su', array('bar'), array(
187+
array('variable', '/', 'é', 'bar', true),
188+
),
189+
),
190+
191+
array(
192+
'Route with an explicit unicode requirement',
193+
array('/{bar}', array('bar' => null), array('bar' => '*.')),
194+
'', '#^/(?P<bar>.)?$#su', array('bar'), array(
195+
array('variable', '/', '.', 'bar', true),
196+
),
197+
),
198+
199+
array(
200+
'Route with a unicode class requirement',
201+
array('/{bar}', array('bar' => null), array('bar' => '\pM')),
202+
'', '#^/(?P<bar>\pM)?$#su', array('bar'), array(
203+
array('variable', '/', '\pM', 'bar', true),
204+
),
205+
),
206+
207+
array(
208+
'Route with a unicode separator',
209+
array('/foo/{bar}§{_format}', array(), array(), array('compiler_class' => UnicodeRouteCompiler::class)),
210+
'/foo', '#^/foo/(?P<bar>[^/§]++)§(?P<_format>[^/]++)$#su', array('bar', '_format'), array(
211+
array('variable', '§', '[^/]++', '_format', true),
212+
array('variable', '/', '[^/§]++', 'bar', true),
213+
array('text', '/foo'),
214+
),
215+
),
165216
);
166217
}
167218

@@ -175,6 +226,16 @@ public function testRouteWithSameVariableTwice()
175226
$compiled = $route->compile();
176227
}
177228

229+
/**
230+
* @expectedException \LogicException
231+
*/
232+
public function testRouteCharsetMismatch()
233+
{
234+
$route = new Route("/\xE9/{bar}", array(), array('bar' => '*.'));
235+
236+
$compiled = $route->compile();
237+
}
238+
178239
/**
179240
* @expectedException \InvalidArgumentException
180241
*/
@@ -275,3 +336,8 @@ public function provideCompileWithHostData()
275336
);
276337
}
277338
}
339+
340+
class UnicodeRouteCompiler extends RouteCompiler
341+
{
342+
const SEPARATORS = '';
343+
}

0 commit comments

Comments
 (0)
0