@@ -228,6 +228,9 @@ def _measure(self, stem):
228
228
# the number of 'VC' occurrences in Porter's reduced form in the
229
229
# docstring above, which is in turn equivalent to `m`
230
230
return cv_sequence .count ('vc' )
231
+
232
+ def _has_positive_measure (self , stem ):
233
+ return self ._measure (stem ) > 0
231
234
232
235
def _contains_vowel (self , stem ):
233
236
"""_contains_vowel(stem) is TRUE <=> stem contains a vowel"""
@@ -497,89 +500,83 @@ def _step2(self, word):
497
500
(m>0) IVITI -> IVE sensitiviti -> sensitive
498
501
(m>0) BILITI -> BLE sensibiliti -> sensible
499
502
"""
500
- positive_measure = lambda stem : self ._measure (stem ) > 0
501
-
503
+
502
504
# --NEW--
503
505
# Instead of applying the ALLI -> AL rule after 'bli' per the
504
506
# published algorithm, instead we apply it first, and, if it
505
507
# succeeds, run the result through step2 again.
506
508
try :
507
- stem = self ._replace_suffix_if (word , 'alli' , 'al' , positive_measure )
509
+ stem = self ._replace_suffix_if (
510
+ word ,
511
+ 'alli' ,
512
+ 'al' ,
513
+ self ._has_positive_measure
514
+ )
508
515
return self ._step2 (stem )
509
516
except _CannotReplaceSuffix :
510
517
pass
511
518
512
519
return self ._apply_first_possible_rule (word , [
513
- ('ational' , 'ate' , positive_measure ),
514
- ('tional' , 'tion' , positive_measure ),
515
- ('enci' , 'ence' , positive_measure ),
516
- ('anci' , 'ance' , positive_measure ),
517
- ('izer' , 'ize' , positive_measure ),
520
+ ('ational' , 'ate' , self . _has_positive_measure ),
521
+ ('tional' , 'tion' , self . _has_positive_measure ),
522
+ ('enci' , 'ence' , self . _has_positive_measure ),
523
+ ('anci' , 'ance' , self . _has_positive_measure ),
524
+ ('izer' , 'ize' , self . _has_positive_measure ),
518
525
519
526
# --DEPARTURE--
520
527
# To match the published algorithm, replace "bli" with
521
528
# "abli" and "ble" with "able"
522
- ('bli' , 'ble' , positive_measure ),
529
+ ('bli' , 'ble' , self . _has_positive_measure ),
523
530
524
531
# -- NEW --
525
- ('fulli' , 'ful' , positive_measure ),
532
+ ('fulli' , 'ful' , self . _has_positive_measure ),
526
533
527
- ('entli' , 'ent' , positive_measure ),
528
- ('eli' , 'e' , positive_measure ),
529
- ('ousli' , 'ous' , positive_measure ),
530
- ('ization' , 'ize' , positive_measure ),
531
- ('ation' , 'ate' , positive_measure ),
532
- ('ator' , 'ate' , positive_measure ),
533
- ('alism' , 'al' , positive_measure ),
534
- ('iveness' , 'ive' , positive_measure ),
535
- ('fulness' , 'ful' , positive_measure ),
536
- ('ousness' , 'ous' , positive_measure ),
537
- ('aliti' , 'al' , positive_measure ),
538
- ('iviti' , 'ive' , positive_measure ),
539
- ('biliti' , 'ble' , positive_measure ),
534
+ ('entli' , 'ent' , self . _has_positive_measure ),
535
+ ('eli' , 'e' , self . _has_positive_measure ),
536
+ ('ousli' , 'ous' , self . _has_positive_measure ),
537
+ ('ization' , 'ize' , self . _has_positive_measure ),
538
+ ('ation' , 'ate' , self . _has_positive_measure ),
539
+ ('ator' , 'ate' , self . _has_positive_measure ),
540
+ ('alism' , 'al' , self . _has_positive_measure ),
541
+ ('iveness' , 'ive' , self . _has_positive_measure ),
542
+ ('fulness' , 'ful' , self . _has_positive_measure ),
543
+ ('ousness' , 'ous' , self . _has_positive_measure ),
544
+ ('aliti' , 'al' , self . _has_positive_measure ),
545
+ ('iviti' , 'ive' , self . _has_positive_measure ),
546
+ ('biliti' , 'ble' , self . _has_positive_measure ),
540
547
541
548
# --DEPARTURE--
542
549
# To match the published algorithm, delete this phrase
543
550
# --NEW-- (Barry Wilkins)
544
551
# To match the published algorithm, replace lambda below
545
- # with just positive_measure
546
- ("logi" , "log" , lambda stem : positive_measure (word [:- 3 ])),
552
+ # with just self._has_positive_measure
553
+ ("logi" , "log" , lambda stem : self . _has_positive_measure (word [:- 3 ])),
547
554
])
548
555
549
556
def _step3 (self , word ):
550
- """step3() deals with -ic-, -full, -ness etc. similar strategy to step2."""
551
-
552
- ch = word [- 1 ]
553
-
554
- if ch == 'e' :
555
- if word .endswith ("icate" ):
556
- return word [:- 3 ] if self ._m (word , len (word )- 6 ) else word
557
- elif word .endswith ("ative" ):
558
- return word [:- 5 ] if self ._m (word , len (word )- 6 ) else word
559
- elif word .endswith ("alize" ):
560
- return word [:- 3 ] if self ._m (word , len (word )- 6 ) else word
561
- else :
562
- return word
563
- elif ch == 'i' :
564
- if word .endswith ("iciti" ):
565
- return word [:- 3 ] if self ._m (word , len (word )- 6 ) else word
566
- else :
567
- return word
568
- elif ch == 'l' :
569
- if word .endswith ("ical" ):
570
- return word [:- 2 ] if self ._m (word , len (word )- 5 ) else word
571
- elif word .endswith ("ful" ):
572
- return word [:- 3 ] if self ._m (word , len (word )- 4 ) else word
573
- else :
574
- return word
575
- elif ch == 's' :
576
- if word .endswith ("ness" ):
577
- return word [:- 4 ] if self ._m (word , len (word )- 5 ) else word
578
- else :
579
- return word
580
-
581
- else :
582
- return word
557
+ """Implements Step 3 from "An algorithm for suffix stripping"
558
+
559
+ From the paper:
560
+
561
+ Step 3
562
+
563
+ (m>0) ICATE -> IC triplicate -> triplic
564
+ (m>0) ATIVE -> formative -> form
565
+ (m>0) ALIZE -> AL formalize -> formal
566
+ (m>0) ICITI -> IC electriciti -> electric
567
+ (m>0) ICAL -> IC electrical -> electric
568
+ (m>0) FUL -> hopeful -> hope
569
+ (m>0) NESS -> goodness -> good
570
+ """
571
+ return self ._apply_first_possible_rule (word , [
572
+ ('icate' , 'ic' , self ._has_positive_measure ),
573
+ ('ative' , '' , self ._has_positive_measure ),
574
+ ('alize' , 'al' , self ._has_positive_measure ),
575
+ ('iciti' , 'ic' , self ._has_positive_measure ),
576
+ ('ical' , 'ic' , self ._has_positive_measure ),
577
+ ('ful' , '' , self ._has_positive_measure ),
578
+ ('ness' , '' , self ._has_positive_measure ),
579
+ ])
583
580
584
581
def _step4 (self , word ):
585
582
"""step4() takes off -ant, -ence etc., in context <c>vcvc<v>."""
0 commit comments