From 06920a79c483c427fec7d85f08bf842bf313bf79 Mon Sep 17 00:00:00 2001 From: Michael Babker Date: Wed, 11 Apr 2018 12:33:35 -0500 Subject: [PATCH] Support pluralization in the inflector --- src/Symfony/Component/Inflector/Inflector.php | 271 +++++++++++++++++- .../Inflector/Tests/InflectorTest.php | 139 +++++++++ 2 files changed, 407 insertions(+), 3 deletions(-) diff --git a/src/Symfony/Component/Inflector/Inflector.php b/src/Symfony/Component/Inflector/Inflector.php index cf685159a7f41..48538751d19d3 100644 --- a/src/Symfony/Component/Inflector/Inflector.php +++ b/src/Symfony/Component/Inflector/Inflector.php @@ -82,9 +82,6 @@ final class Inflector // news (news) array('swen', 4, true, true, 'news'), - // series (series) - array('seires', 6, true, true, 'series'), - // babies (baby) array('sei', 3, false, true, 'y'), @@ -139,6 +136,179 @@ final class Inflector array('elpoep', 6, true, true, 'person'), ); + /** + * Map English singular to plural suffixes. + * + * @var array + * + * @see http://english-zone.com/spelling/plurals.html + */ + private static $singularMap = array( + // First entry: singular suffix, reversed + // Second entry: length of singular suffix + // Third entry: Whether the suffix may succeed a vocal + // Fourth entry: Whether the suffix may succeed a consonant + // Fifth entry: plural suffix, normal + + // criterion (criteria) + array('airetirc', 8, false, false, 'criterion'), + + // nebulae (nebula) + array('aluben', 6, false, false, 'nebulae'), + + // children (child) + array('dlihc', 5, true, true, 'children'), + + // prices (price) + array('eci', 3, false, true, 'ices'), + + // services (service) + array('ecivres', 7, true, true, 'services'), + + // lives (life), wives (wife) + array('efi', 3, false, true, 'ives'), + + // selfies (selfie) + array('eifles', 6, true, true, 'selfies'), + + // movies (movie) + array('eivom', 5, true, true, 'movies'), + + // lice (louse) + array('esuol', 5, false, true, 'lice'), + + // mice (mouse) + array('esuom', 5, false, true, 'mice'), + + // geese (goose) + array('esoo', 4, false, true, 'eese'), + + // houses (house), bases (base) + array('es', 2, true, true, 'ses'), + + // geese (goose) + array('esoog', 5, true, true, 'geese'), + + // caves (cave) + array('ev', 2, true, true, 'ves'), + + // drives (drive) + array('evird', 5, false, true, 'drives'), + + // objectives (objective), alternative (alternatives) + array('evit', 4, true, true, 'tives'), + + // moves (move) + array('evom', 4, true, true, 'moves'), + + // staves (staff) + array('ffats', 5, true, true, 'staves'), + + // hooves (hoof), dwarves (dwarf), elves (elf), leaves (leaf) + array('ff', 2, true, true, 'ffs'), + + // hooves (hoof), dwarves (dwarf), elves (elf), leaves (leaf) + array('f', 1, true, true, array('fs', 'ves')), + + // arches (arch) + array('hc', 2, true, true, 'ches'), + + // bushes (bush) + array('hs', 2, true, true, 'shes'), + + // teeth (tooth) + array('htoot', 5, true, true, 'teeth'), + + // bacteria (bacterium), criteria (criterion), phenomena (phenomenon) + array('mu', 2, true, true, 'a'), + + // echoes (echo) + array('ohce', 4, true, true, 'echoes'), + + // men (man), women (woman) + array('nam', 3, true, true, 'men'), + + // people (person) + array('nosrep', 6, true, true, array('persons', 'people')), + + // bacteria (bacterium), criteria (criterion), phenomena (phenomenon) + array('noi', 3, true, true, 'ions'), + + // bacteria (bacterium), criteria (criterion), phenomena (phenomenon) + array('no', 2, true, true, 'a'), + + // atlases (atlas) + array('salta', 5, true, true, 'atlases'), + + // irises (iris) + array('siri', 4, true, true, 'irises'), + + // analyses (analysis), ellipses (ellipsis), neuroses (neurosis) + // theses (thesis), emphases (emphasis), oases (oasis), + // crises (crisis) + array('sis', 3, true, true, 'ses'), + + // accesses (access), addresses (address), kisses (kiss) + array('ss', 2, true, false, 'sses'), + + // syllabi (syllabus) + array('suballys', 8, true, true, 'syllabi'), + + // buses (bus) + array('sub', 3, true, true, 'buses'), + + // fungi (fungus), alumni (alumnus), syllabi (syllabus), radii (radius) + array('su', 2, true, true, 'i'), + + // news (news) + array('swen', 4, true, true, 'news'), + + // feet (foot) + array('toof', 4, true, true, 'feet'), + + // chateaux (chateau), bureaus (bureau) + array('uae', 3, false, true, array('eaus', 'eaux')), + + // oxen (ox) + array('xo', 2, false, false, 'oxen'), + + // hoaxes (hoax) + array('xaoh', 4, true, false, 'hoaxes'), + + // indices (index) + array('xedni', 5, false, true, array('indicies', 'indexes')), + + // indexes (index), matrixes (matrix) + array('x', 1, true, false, array('cies', 'xes')), + + // appendices (appendix) + array('xi', 2, false, true, 'ices'), + + // babies (baby) + array('y', 1, false, true, 'ies'), + + // quizzes (quiz) + array('ziuq', 4, true, false, 'quizzes'), + + // waltzes (waltz) + array('z', 1, true, false, 'zes'), + ); + + /** + * A list of words which should not be inflected + * + * @var array + */ + private static $uninflected = array( + 'data', + 'deer', + 'feedback', + 'fish', + 'moose', + 'series', + 'sheep', + ); + /** * This class should not be instantiated. */ @@ -165,6 +335,11 @@ public static function singularize(string $plural) $lowerPluralRev = strtolower($pluralRev); $pluralLength = strlen($lowerPluralRev); + // Check if the word is one which is not inflected, return early if so + if (in_array(strtolower($plural), self::$uninflected, true)) { + return $plural; + } + // The outer loop iterates over the entries of the plural table // The inner loop $j iterates over the characters of the plural suffix // in the plural table to compare them with the characters of the actual @@ -229,4 +404,94 @@ public static function singularize(string $plural) // Assume that plural and singular is identical return $plural; } + + /** + * Returns the plural form of a word. + * + * If the method can't determine the form with certainty, an array of the + * possible plurals is returned. + * + * @param string $singular A word in plural form + * + * @return string|array The plural form or an array of possible plural + * forms + * + * @internal + */ + public static function pluralize(string $singular) + { + $singularRev = strrev($singular); + $lowerSingularRev = strtolower($singularRev); + $singularLength = strlen($lowerSingularRev); + + // Check if the word is one which is not inflected, return early if so + if (in_array(strtolower($singular), self::$uninflected, true)) { + return $singular; + } + + // The outer loop iterates over the entries of the singular table + // The inner loop $j iterates over the characters of the singular suffix + // in the singular table to compare them with the characters of the actual + // given singular suffix + foreach (self::$singularMap as $map) { + $suffix = $map[0]; + $suffixLength = $map[1]; + $j = 0; + + // Compare characters in the singular table and of the suffix of the + // given plural one by one + + while ($suffix[$j] === $lowerSingularRev[$j]) { + // Let $j point to the next character + ++$j; + + // Successfully compared the last character + // Add an entry with the plural suffix to the plural array + if ($j === $suffixLength) { + // Is there any character preceding the suffix in the plural string? + if ($j < $singularLength) { + $nextIsVocal = false !== strpos('aeiou', $lowerSingularRev[$j]); + + if (!$map[2] && $nextIsVocal) { + // suffix may not succeed a vocal but next char is one + break; + } + + if (!$map[3] && !$nextIsVocal) { + // suffix may not succeed a consonant but next char is one + break; + } + } + + $newBase = substr($singular, 0, $singularLength - $suffixLength); + $newSuffix = $map[4]; + + // Check whether the first character in the singular suffix + // is uppercased. If yes, uppercase the first character in + // the singular suffix too + $firstUpper = ctype_upper($singularRev[$j - 1]); + + if (is_array($newSuffix)) { + $plurals = array(); + + foreach ($newSuffix as $newSuffixEntry) { + $plurals[] = $newBase.($firstUpper ? ucfirst($newSuffixEntry) : $newSuffixEntry); + } + + return $plurals; + } + + return $newBase.($firstUpper ? ucfirst($newSuffix) : $newSuffix); + } + + // Suffix is longer than word + if ($j === $singularLength) { + break; + } + } + } + + // Assume that plural is singular with a trailing `s` + return $singular.'s'; + } } diff --git a/src/Symfony/Component/Inflector/Tests/InflectorTest.php b/src/Symfony/Component/Inflector/Tests/InflectorTest.php index be7836736f8ba..b0a68944d944a 100644 --- a/src/Symfony/Component/Inflector/Tests/InflectorTest.php +++ b/src/Symfony/Component/Inflector/Tests/InflectorTest.php @@ -155,6 +155,130 @@ public function singularizeProvider() ); } + public function pluralizeProvider() + { + // see http://english-zone.com/spelling/plurals.html + // see http://www.scribd.com/doc/3271143/List-of-100-Irregular-Plural-Nouns-in-English + return array( + array('access', 'accesses'), + array('address', 'addresses'), + array('agenda', 'agendas'), + array('alumnus', 'alumni'), + array('analysis', 'analyses'), + array('antenna', 'antennas'), //antennae + array('appendix', array('appendicies', 'appendixes')), + array('arch', 'arches'), + array('atlas', 'atlases'), + array('axe', 'axes'), + array('baby', 'babies'), + array('bacterium', 'bacteria'), + array('base', 'bases'), + array('batch', 'batches'), + array('beau', array('beaus', 'beaux')), + array('bee', 'bees'), + array('box', array('bocies', 'boxes')), + array('boy', 'boys'), + array('bureau', array('bureaus', 'bureaux')), + array('bus', 'buses'), + array('bush', 'bushes'), + array('calf', array('calfs', 'calves')), + array('car', 'cars'), + array('cassette', 'cassettes'), + array('cave', 'caves'), + array('chateau', array('chateaus', 'chateaux')), + array('cheese', 'cheeses'), + array('child', 'children'), + array('circus', 'circuses'), + array('cliff', 'cliffs'), + array('committee', 'committees'), + array('crisis', 'crises'), + array('criteria', 'criterion'), + array('cup', 'cups'), + array('data', 'data'), + array('day', 'days'), + array('disco', 'discos'), + array('device', 'devices'), + array('drive', 'drives'), + array('driver', 'drivers'), + array('dwarf', array('dwarfs', 'dwarves')), + array('echo', 'echoes'), + array('elf', array('elfs', 'elves')), + array('emphasis', 'emphases'), + array('fax', array('facies', 'faxes')), + array('feedback', 'feedback'), + array('focus', 'foci'), + array('foot', 'feet'), + array('formula', 'formulas'), //formulae + array('fungus', 'fungi'), + array('garage', 'garages'), + array('goose', 'geese'), + array('half', array('halfs', 'halves')), + array('hat', 'hats'), + array('hero', 'heroes'), + array('hippopotamus', 'hippopotami'), //hippopotamuses + array('hoax', 'hoaxes'), + array('hoof', array('hoofs', 'hooves')), + array('house', 'houses'), + array('index', array('indicies', 'indexes')), + array('ion', 'ions'), + array('iris', 'irises'), + array('kiss', 'kisses'), + array('knife', 'knives'), + array('lamp', 'lamps'), + array('leaf', array('leafs', 'leaves')), + array('life', 'lives'), + array('louse', 'lice'), + array('man', 'men'), + array('matrix', array('matricies', 'matrixes')), + array('mouse', 'mice'), + array('move', 'moves'), + array('movie', 'movies'), + array('nebula', 'nebulae'), + array('neurosis', 'neuroses'), + array('news', 'news'), + array('oasis', 'oases'), + array('objective', 'objectives'), + array('ox', 'oxen'), + array('party', 'parties'), + array('person', array('persons', 'people')), + array('phenomenon', 'phenomena'), + array('photo', 'photos'), + array('piano', 'pianos'), + array('plateau', array('plateaus', 'plateaux')), + array('poppy', 'poppies'), + array('price', 'prices'), + array('quiz', 'quizzes'), + array('radius', 'radii'), + array('roof', array('roofs', 'rooves')), + array('rose', 'roses'), + array('sandwich', 'sandwiches'), + array('scarf', array('scarfs', 'scarves')), + array('schema', 'schemas'), //schemata + array('selfie', 'selfies'), + array('series', 'series'), + array('service', 'services'), + array('sheriff', 'sheriffs'), + array('shoe', 'shoes'), + array('spy', 'spies'), + array('staff', 'staves'), + array('story', 'stories'), + array('stratum', 'strata'), + array('suitcase', 'suitcases'), + array('syllabus', 'syllabi'), + array('tag', 'tags'), + array('thief', array('thiefs', 'thieves')), + array('tooth', 'teeth'), + array('tree', 'trees'), + array('waltz', 'waltzes'), + array('wife', 'wives'), + + // test casing: if the first letter was uppercase, it should remain so + array('Man', 'Men'), + array('GrandChild', 'GrandChildren'), + array('SubTree', 'SubTrees'), + ); + } + /** * @dataProvider singularizeProvider */ @@ -169,4 +293,19 @@ public function testSingularize($plural, $singular) $this->assertEquals($singular, $single); } + + /** + * @dataProvider pluralizeProvider + */ + public function testPluralize($plural, $singular) + { + $single = Inflector::pluralize($plural); + if (is_string($singular) && is_array($single)) { + $this->fail("--- Expected\n`string`: ".$singular."\n+++ Actual\n`array`: ".implode(', ', $single)); + } elseif (is_array($singular) && is_string($single)) { + $this->fail("--- Expected\n`array`: ".implode(', ', $singular)."\n+++ Actual\n`string`: ".$single); + } + + $this->assertEquals($singular, $single); + } }