@@ -1395,7 +1395,7 @@ def _make_int_array():
13951395
13961396
13971397class TfidfTransformer (TransformerMixin , BaseEstimator ):
1398- """Transform a count matrix to a normalized tf or tf-idf representation
1398+ """Transform a count matrix to a normalized tf or tf-idf representation.
13991399
14001400 Tf means term-frequency while tf-idf means term-frequency times inverse
14011401 document-frequency. This is a common term weighting scheme in information
@@ -1445,7 +1445,7 @@ class TfidfTransformer(TransformerMixin, BaseEstimator):
14451445 similarity between two vectors is their dot product when l2 norm has
14461446 been applied.
14471447 * 'l1': Sum of absolute values of vector elements is 1.
1448- See :func:`preprocessing.normalize`
1448+ See :func:`preprocessing.normalize`.
14491449
14501450 use_idf : bool, default=True
14511451 Enable inverse-document-frequency reweighting.
@@ -1471,6 +1471,26 @@ class TfidfTransformer(TransformerMixin, BaseEstimator):
14711471
14721472 .. versionadded:: 1.0
14731473
1474+ See Also
1475+ --------
1476+ CountVectorizer : Transforms text into a sparse matrix of n-gram counts.
1477+
8000
1478+ TfidfVectorizer : Convert a collection of raw documents to a matrix of
1479+ TF-IDF features.
1480+
1481+ HashingVectorizer : Convert a collection of text documents to a matrix
1482+ of token occurrences.
1483+
1484+ References
1485+ ----------
1486+
1487+ .. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern
1488+ Information Retrieval. Addison Wesley, pp. 68-74.
1489+
1490+ .. [MRS2008] C.D. Manning, P. Raghavan and H. Schütze (2008).
1491+ Introduction to Information Retrieval. Cambridge University
1492+ Press, pp. 118-120.
1493+
14741494 Examples
14751495 --------
14761496 >>> from sklearn.feature_extraction.text import TfidfTransformer
@@ -1495,16 +1515,6 @@ class TfidfTransformer(TransformerMixin, BaseEstimator):
14951515 1. , 1.91629073, 1.91629073])
14961516 >>> pipe.transform(corpus).shape
14971517 (4, 8)
1498-
1499- References
1500- ----------
1501-
1502- .. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern
1503- Information Retrieval. Addison Wesley, pp. 68-74.
1504-
1505- .. [MRS2008] C.D. Manning, P. Raghavan and H. Schütze (2008).
1506- Introduction to Information Retrieval. Cambridge University
1507- Press, pp. 118-120.
15081518 """
15091519
15101520 def __init__ (self , * , norm = "l2" , use_idf = True , smooth_idf = True , sublinear_tf = False ):
@@ -1520,6 +1530,14 @@ def fit(self, X, y=None):
15201530 ----------
15211531 X : sparse matrix of shape n_samples, n_features)
15221532 A matrix of term/token counts.
1533+
1534+ y : None
1535+ This parameter is not needed to compute tf-idf.
1536+
1537+ Returns
1538+ -------
1539+ self : object
1540+ Fitted transformer.
15231541 """
15241542 X = self ._validate_data (X , accept_sparse = ("csr" , "csc" ))
15251543 if not sp .issparse (X ):
@@ -1549,12 +1567,12 @@ def fit(self, X, y=None):
15491567 return self
15501568
15511569 def transform (self , X , copy = True ):
1552- """Transform a count matrix to a tf or tf-idf representation
1570+ """Transform a count matrix to a tf or tf-idf representation.
15531571
15541572 Parameters
15551573 ----------
15561574 X : sparse matrix of (n_samples, n_features)
1557- a matrix of term/token counts
1575+ A matrix of term/token counts.
15581576
15591577 copy : bool, default=True
15601578 Whether to copy X and operate on the copy or perform in-place
@@ -1563,6 +1581,7 @@ def transform(self, X, copy=True):
15631581 Returns
15641582 -------
15651583 vectors : sparse matrix of shape (n_samples, n_features)
1584+ Tf-idf-weighted document-term matrix.
15661585 """
15671586 X = self ._validate_data (
15681587 X , accept_sparse = "csr" , dtype = FLOAT_DTYPES , copy = copy , reset = False
@@ -1590,6 +1609,14 @@ def transform(self, X, copy=True):
15901609
15911610 @property
15921611 def idf_ (self ):
1612+ """Return the inverse document frecuency (IDF) vector.
1613+
1614+ Returns
1615+ -------
1616+ idf_ : ndarray of shape (n_features,)
1617+ The inverse document frequency (IDF) vector; only defined
1618+ if `use_idf` is True.
1619+ """
15931620 # if _idf_diag is not set, this will raise an attribute error,
15941621 # which means hasattr(self, "idf_") is False
15951622 return np .ravel (self ._idf_diag .sum (axis = 0 ))
0 commit comments