@@ -1342,6 +1342,31 @@ class TfidfTransformer(TransformerMixin, BaseEstimator):
1342
1342
The inverse document frequency (IDF) vector; only defined
1343
1343
if ``use_idf`` is True.
1344
1344
1345
+ Examples
1346
+ --------
1347
+ >>> from sklearn.feature_extraction.text import TfidfTransformer
1348
+ >>> from sklearn.feature_extraction.text import CountVectorizer
1349
+ >>> from sklearn.pipeline import Pipeline
1350
+ >>> import numpy as np
1351
+ >>> corpus = ['this is the first document',
1352
+ ... 'this document is the second document',
1353
+ ... 'and this is the third one',
1354
+ ... 'is this the first document']
1355
+ >>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the',
1356
+ ... 'and', 'one']
1357
+ >>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)),
1358
+ ... ('tfid', TfidfTransformer())]).fit(corpus)
1359
+ >>> pipe['count'].transform(corpus).toarray()
1360
+ array([[1, 1, 1, 1, 0, 1, 0, 0],
1361
+ [1, 2, 0, 1, 1, 1, 0, 0],
1362
+ [1, 0, 0, 1, 0, 1, 1, 1],
1363
+ [1, 1, 1, 1, 0, 1, 0, 0]])
1364
+ >>> pipe['tfid'].idf_
1365
+ array([1. , 1.22314355, 1.51082562, 1. , 1.91629073,
1366
+ 1. , 1.91629073, 1.91629073])
1367
+ >>> pipe.transform(corpus).shape
1368
+ (4, 8)
1369
+
1345
1370
References
1346
1371
----------
1347
1372
0 commit comments