|
9 | 9 | We start by training a label propagation model with only 10 labeled points,
|
10 | 10 | then we select the top five most uncertain points to label. Next, we train
|
11 | 11 | with 15 labeled points (original 10 + 5 new ones). We repeat this process
|
12 |
| -four times to have a model trained with 30 labeled examples. |
| 12 | +four times to have a model trained with 30 labeled examples. Note you can |
| 13 | +increase this to label more than 30 by changing `max_iterations`. Labeling |
| 14 | +more than 30 can be useful to get a sense for the speed of convergence of |
| 15 | +this active learning technique. |
13 | 16 |
|
14 | 17 | A plot will appear showing the top 5 most uncertain digits for each iteration
|
15 | 18 | of training. These may or may not contain mistakes, but we will train the next
|
|
39 | 42 |
|
40 | 43 | n_total_samples = len(y)
|
41 | 44 | n_labeled_points = 10
|
| 45 | +max_iterations = 5 |
42 | 46 |
|
43 | 47 | unlabeled_indices = np.arange(n_total_samples)[n_labeled_points:]
|
44 | 48 | f = plt.figure()
|
45 | 49 |
|
46 |
| -for i in range(5): |
| 50 | +for i in range(max_iterations): |
| 51 | + if len(unlabeled_indices) == 0: |
| 52 | + print("No unlabeled items left to label.") |
| 53 | + break |
47 | 54 | y_train = np.copy(y)
|
48 | 55 | y_train[unlabeled_indices] = -1
|
49 | 56 |
|
|
56 | 63 | cm = confusion_matrix(true_labels, predicted_labels,
|
57 | 64 | labels=lp_model.classes_)
|
58 | 65 |
|
59 |
| - print('Iteration %i %s' % (i, 70 * '_')) |
| 66 | + print("Iteration %i %s" % (i, 70 * "_")) |
60 | 67 | print("Label Spreading model: %d labeled & %d unlabeled (%d total)"
|
61 | 68 | % (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))
|
62 | 69 |
|
|
69 | 76 | pred_entropies = stats.distributions.entropy(
|
70 | 77 | lp_model.label_distributions_.T)
|
71 | 78 |
|
72 |
| - # select five digit examples that the classifier is most uncertain about |
73 |
| - uncertainty_index = uncertainty_index = np.argsort(pred_entropies)[-5:] |
| 79 | + # select up to 5 digit examples that the classifier is most uncertain about |
| 80 | + uncertainty_index = np.argsort(pred_entropies)[::-1] |
| 81 | + uncertainty_index = uncertainty_index[ |
| 82 | + np.in1d(uncertainty_index, unlabeled_indices)][:5] |
74 | 83 |
|
75 | 84 | # keep track of indices that we get labels for
|
76 | 85 | delete_indices = np.array([])
|
77 | 86 |
|
78 |
| - f.text(.05, (1 - (i + 1) * .183), |
79 |
| - "model %d\n\nfit with\n%d labels" % ((i + 1), i * 5 + 10), size=10) |
| 87 | + # for more than 5 iterations, visualize the gain only on the first 5 |
| 88 | + if i < 5: |
| 89 | + f.text(.05, (1 - (i + 1) * .183), |
| 90 | + "model %d\n\nfit with\n%d labels" % |
| 91 | + ((i + 1), i * 5 + 10), size=10) |
80 | 92 | for index, image_index in enumerate(uncertainty_index):
|
81 | 93 | image = images[image_index]
|
82 | 94 |
|
83 |
| - sub = f.add_subplot(5, 5, index + 1 + (5 * i)) |
84 |
| - sub.imshow(image, cmap=plt.cm.gray_r) |
85 |
| - sub.set_title('predict: %i\ntrue: %i' % ( |
86 |
| - lp_model.transduction_[image_index], y[image_index]), size=10) |
87 |
| - sub.axis('off') |
| 95 | + # for more than 5 iterations, visualize the gain only on the first 5 |
| 96 | + if i < 5: |
| 97 | + sub = f.add_subplot(5, 5, index + 1 + (5 * i)) |
| 98 | + sub.imshow(image, cmap=plt.cm.gray_r) |
| 99 | + sub.set_title("predict: %i\ntrue: %i" % ( |
| 100 | + lp_model.transduction_[image_index], y[image_index]), size=10) |
| 101 | + sub.axis('off') |
88 | 102 |
|
89 | 103 | # labeling 5 points, remote from labeled set
|
90 | 104 | delete_index, = np.where(unlabeled_indices == image_index)
|
91
6CD4
code> | 105 | delete_indices = np.concatenate((delete_indices, delete_index))
|
92 | 106 |
|
93 | 107 | unlabeled_indices = np.delete(unlabeled_indices, delete_indices)
|
94 |
| - n_labeled_points += 5 |
| 108 | + n_labeled_points += len(uncertainty_index) |
95 | 109 |
|
96 | 110 | f.suptitle("Active learning with Label Propagation.\nRows show 5 most "
|
97 | 111 | "uncertain labels to learn with the next model.")
|
|
0 commit comments