8000 TST: LatentDirichletAllocation behavior on empty documents · scikit-learn/scikit-learn@5908bcc · GitHub
[go: up one dir, main page]

Skip to content

Commit 5908bcc

Browse files
committed
TST: LatentDirichletAllocation behavior on empty documents
Preparing for potential optimization of this estimator.
1 parent f4fa782 commit 5908bcc

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

sklearn/decomposition/tests/test_online_lda.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,10 +286,20 @@ def test_perplexity_input_format():
286286
def test_lda_score_perplexity():
287287
# Test the relationship between LDA score and perplexity
288288
n_topics, X = _build_sparse_mtx()
289-
lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=10, random_state=0)
289+
lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=10,
290+
random_state=0)
290291
distr = lda.fit_transform(X)
291292
perplexity_1 = lda.perplexity(X, distr, sub_sampling=False)
292293

293294
score = lda.score(X)
294295
perplexity_2 = np.exp(-1. * (score / np.sum(X.data)))
295296
assert_almost_equal(perplexity_1, perplexity_2)
297+
298+
299+
def test_lda_empty_docs():
300+
"""Test LDA on empty document (all-zero rows)."""
301+
Z = np.zeros((5, 4))
302+
for X in [Z, csr_matrix(Z)]:
303+
lda = LatentDirichletAllocation(max_iter=750).fit(X)
304+
assert_almost_equal(lda.components_.sum(axis=0),
305+
np.ones(lda.components_.shape[1]))

0 commit comments

Comments
 (0)
0