8000 DOC Fixing a bug where entropy included labeled items (#8150) · scikit-learn/scikit-learn@1b0ec1b · GitHub
[go: up one dir, main page]

Skip to content

Commit 1b0ec1b

Browse files
mdezubejnothman
authored andcommitted
DOC Fixing a bug where entropy included labeled items (#8150)
1 parent 9f6b849 commit 1b0ec1b

File tree

1 file changed

+27
-13
lines changed

1 file changed

+27
-13
lines changed

examples/semi_supervised/plot_label_propagation_digits_active_learning.py

+27-13
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@
99
We start by training a label propagation model with only 10 labeled points,
1010
then we select the top five most uncertain points to label. Next, we train
1111
with 15 labeled points (original 10 + 5 new ones). We repeat this process
12-
four times to have a model trained with 30 labeled examples.
12+
four times to have a model trained with 30 labeled examples. Note you can
13+
increase this to label more than 30 by changing `max_iterations`. Labeling
14+
more than 30 can be useful to get a sense for the speed of convergence of
15+
this active learning technique.
1316
1417
A plot will appear showing the top 5 most uncertain digits for each iteration
1518
of training. These may or may not contain mistakes, but we will train the next
@@ -39,11 +42,15 @@
3942

4043
n_total_samples = len(y)
4144
n_labeled_points = 10
45+
max_iterations = 5
4246

4347
unlabeled_indices = np.arange(n_total_samples)[n_labeled_points:]
4448
f = plt.figure()
4549

46-
for i in range(5):
50+
for i in range(max_iterations):
51+
if len(unlabeled_indices) == 0:
52+
print("No unlabeled items left to label.")
53+
break
4754
y_train = np.copy(y)
4855
y_train[unlabeled_indices] = -1
4956

@@ -56,7 +63,7 @@
5663
cm = confusion_matrix(true_labels, predicted_labels,
5764
labels=lp_model.classes_)
5865

59-
print('Iteration %i %s' % (i, 70 * '_'))
66+
print("Iteration %i %s" % (i, 70 * "_"))
6067
print("Label Spreading model: %d labeled & %d unlabeled (%d total)"
6168
% (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))
6269

@@ -69,29 +76,36 @@
6976
pred_entropies = stats.distributions.entropy(
7077
lp_model.label_distributions_.T)
7178

72-
# select five digit examples that the classifier is most uncertain about
73-
uncertainty_index = uncertainty_index = np.argsort(pred_entropies)[-5:]
79+
# select up to 5 digit examples that the classifier is most uncertain about
80+
uncertainty_index = np.argsort(pred_entropies)[::-1]
81+
uncertainty_index = uncertainty_index[
82+
np.in1d(uncertainty_index, unlabeled_indices)][:5]
7483

7584
# keep track of indices that we get labels for
7685
delete_indices = np.array([])
7786

78-
f.text(.05, (1 - (i + 1) * .183),
79-
"model %d\n\nfit with\n%d labels" % ((i + 1), i * 5 + 10), size=10)
87+
# for more than 5 iterations, visualize the gain only on the first 5
88+
if i < 5:
89+
f.text(.05, (1 - (i + 1) * .183),
90+
"model %d\n\nfit with\n%d labels" %
91+
((i + 1), i * 5 + 10), size=10)
8092
for index, image_index in enumerate(uncertainty_index):
8193
image = images[image_index]
8294

83-
sub = f.add_subplot(5, 5, index + 1 + (5 * i))
84-
sub.imshow(image, cmap=plt.cm.gray_r)
85-
sub.set_title('predict: %i\ntrue: %i' % (
86-
lp_model.transduction_[image_index], y[image_index]), size=10)
87-
sub.axis('off')
95+
# for more than 5 iterations, visualize the gain only on the first 5
96+
if i < 5:
97+
sub = f.add_subplot(5, 5, index + 1 + (5 * i))
98+
sub.imshow(image, cmap=plt.cm.gray_r)
99+
sub.set_title("predict: %i\ntrue: %i" % (
100+
lp_model.transduction_[image_index], y[image_index]), size=10)
101+
sub.axis('off')
88102

89103
# labeling 5 points, remote from labeled set
90104
delete_index, = np.where(unlabeled_indices == image_index)
91105
delete_indices = np.concatenate((delete_indices, delete_index))
92106

93107
unlabeled_indices = np.delete(unlabeled_indices, delete_indices)
94-
n_labeled_points += 5
108+
n_labeled_points += len(uncertainty_index)
95109

96110
f.suptitle("Active learning with Label Propagation.\nRows show 5 most "
97111
"uncertain labels to learn with the next model.")

0 commit comments

Comments
 (0)
0