@@ -27,7 +27,9 @@ class _LineSearchError(RuntimeError):
27
27
pass
28
28
29
29
30
- def _line_search_wolfe12 (f , fprime , xk , pk , gfk , old_fval , old_old_fval , ** kwargs ):
30
+ def _line_search_wolfe12 (
31
+ f , fprime , xk , pk , gfk , old_fval , old_old_fval , verbose = 0 , ** kwargs
32
+ ):
31
33
"""
32
34
Same as line_search_wolfe1, but fall back to line_search_wolfe2 if
33
35
suitable step length is not found, and raise an exception if a
@@ -39,24 +41,44 @@ def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwarg
39
41
If no suitable step size is found.
40
42
41
43
"""
44
+ is_verbose = verbose >= 2
45
+ eps = 16 * np .finfo (np .asarray (old_fval ).dtype ).eps
46
+ if is_verbose :
47
+ print (" Line Search" )
48
+ print (f" eps=16 * finfo.eps={ eps } " )
49
+ print (" try line search wolfe1" )
50
+
42
51
ret = line_search_wolfe1 (f , fprime , xk , pk , gfk , old_fval , old_old_fval , ** kwargs )
43
52
53
+ if is_verbose :
54
+ _not_ = "not " if ret [0 ] is None else ""
55
+ print (" wolfe1 line search was " + _not_ + "successful" )
56
+
44
57
if ret [0 ] is None :
45
58
# Have a look at the line_search method of our NewtonSolver class. We borrow
46
59
# the logic from there
47
60
# Deal with relative loss differences around machine precision.
48
61
args = kwargs .get ("args" , tuple ())
49
62
fval = f (xk + pk , * args )
50
- eps = 16 * np .finfo (np .asarray (old_fval ).dtype ).eps
51
63
tiny_loss = np .abs (old_fval * eps )
52
64
loss_improvement = fval - old_fval
53
65
check = np .abs (loss_improvement ) <= tiny_loss
66
+ if is_verbose :
67
+ print (
68
+ " check loss |improvement| <= eps * |loss_old|:"
69
+ f" { np .abs (loss_improvement )} <= { tiny_loss } { check } "
70
+ )
54
71
if check :
55
72
# 2.1 Check sum of absolute gradients as alternative condition.
56
73
sum_abs_grad_old = scipy .linalg .norm (gfk , ord = 1 )
57
74
grad = fprime (xk + pk , * args )
58
75
sum_abs_grad = scipy .linalg .norm (grad , ord = 1 )
59
76
check = sum_abs_grad < sum_abs_grad_old
77
+ if is_verbose :
78
+ print (
79
+ " check sum(|gradient|) < sum(|gradient_old|): "
80
+ f"{ sum_abs_grad } < { sum_abs_grad_old } { check } "
81
+ )
60
82
if check :
61
83
ret = (
62
84
1.0 , # step size
@@ -72,17 +94,22 @@ def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwarg
72
94
# TODO: It seems that the new check for the sum of absolute gradients above
73
95
# catches all cases that, earlier, ended up here. In fact, our tests never
74
96
# trigger this "if branch" here and we can consider to remove it.
97
+ if is_verbose :
98
+ print (" last resort: try line search wolfe2" )
75
99
ret = line_search_wolfe2 (
76
100
f , fprime , xk , pk , gfk , old_fval , old_old_fval , ** kwargs
77
101
)
102
+ if is_verbose :
103
+ _not_ = "not " if ret [0 ] is None else ""
104
+ print (" wolfe2 line search was " + _not_ + "successful" )
78
105
79
106
if ret [0 ] is None :
80
107
raise _LineSearchError ()
81
108
82
109
return ret
83
110
84
111
85
- def _cg (fhess_p , fgrad , maxiter , tol ):
112
+ def _cg (fhess_p , fgrad , maxiter , tol , verbose = 0 ):
86
113
"""
87
114
Solve iteratively the linear system 'fhess_p . xsupi = fgrad'
88
115
with a conjugate gradient descent.
@@ -107,30 +134,51 @@ def _cg(fhess_p, fgrad, maxiter, tol):
107
134
xsupi : ndarray of shape (n_features,) or (n_features + 1,)
108
135
Estimated solution.
109
136
"""
137
+ eps = 16 * np .finfo (np .float64 ).eps
110
138
xsupi = np .zeros (len (fgrad ), dtype = fgrad .dtype )
111
- ri = np .copy (fgrad )
139
+ ri = np .copy (fgrad ) # residual = fgrad - fhess_p @ xsupi
112
140
psupi = - ri
113
141
i = 0
114
142
dri0 = np .dot (ri , ri )
115
- # We also track of |p_i|^2.
143
+ # We also keep track of |p_i|^2.
116
144
psupi_norm2 = dri0
145
+ is_verbose = verbose >= 2
117
146
118
147
while i <= maxiter :
119
148
if np .sum (np .abs (ri )) <= tol :
149
+ if is_verbose :
150
+ print (
151
+ f" Inner CG solver iteration { i } stopped with\n "
152
+ f" sum(|residuals|) <= tol: { np .sum (np .abs (ri ))} <= { tol } "
153
+ )
120
154
break
121
155
122
156
Ap = fhess_p (psupi )
123
157
# check curvature
124
158
curv = np .dot (psupi , Ap )
125
- if 0 <= curv <= 16 * np . finfo ( np . float64 ). eps * psupi_norm2 :
159
+ if 0 <= curv <= eps * psupi_norm2 :
126
160
# See https://arxiv.org/abs/1803.02924, Algo 1 Capped Conjugate Gradient.
161
+ if is_verbose :
162
+ print (
<
111C
/td>163
+ f" Inner CG solver iteration { i } stopped with\n "
164
+ f" tiny_|p| = eps * ||p||^2, eps = { eps } , "
165
+ f"squred L2 norm ||p||^2 = { psupi_norm2 } \n
F438
"
166
+ f" curvature <= tiny_|p|: { curv } <= { eps * psupi_norm2 } "
167
+ )
127
168
break
128
169
elif curv < 0 :
129
170
if i > 0 :
171
+ if is_verbose :
172
+ print (
173
+ f" Inner CG solver iteration { i } stopped with negative "
174
+ f"curvature, curvature = { curv } "
175
+ )
130
176
break
131
177
else :
132
178
# fall back to steepest descent direction
133
179
xsupi += dri0 / curv * psupi
180
+ if is_verbose :
181
+ print (" Inner CG solver iteration 0 fell back to steepest descent" )
134
182
break
135
183
alphai = dri0 / curv
136
184
xsupi += alphai * psupi
@@ -142,7 +190,11 @@ def _cg(fhess_p, fgrad, maxiter, tol):
142
190
psupi_norm2 = dri1 + betai ** 2 * psupi_norm2
143
191
i = i + 1
144
192
dri0 = dri1 # update np.dot(ri,ri) for next time.
145
-
193
+ if is_verbose and i > maxiter :
194
+ print (
195
+ f" Inner CG solver stopped reaching maxiter={ i - 1 } with "
196
+ f"sum(|residuals|) = { np .sum (np .abs (ri ))} "
197
+ )
146
198
return xsupi
147
199
148
200
@@ -157,6 +209,7 @@ def _newton_cg(
157
209
maxinner = 200 ,
158
210
line_search = True ,
159
211
warn = True ,
212
+ verbose = 0 ,
160
213
):
161
214
"""
162
215
Minimization of scalar function of one or more variables using the
@@ -210,6 +263,10 @@ def _newton_cg(
210
263
if line_search :
211
264
old_fval = func (x0 , * args )
212
265
old_old_fval = None
266
+ else :
267
+ old_fval = 0
268
+
269
+ is_verbose = verbose > 0
213
270
214
271
# Outer loop: our Newton iteration
215
272
while k < maxiter :
@@ -218,7 +275,13 @@ def _newton_cg(
218
275
fgrad , fhess_p = grad_hess (xk , * args )
219
276
220
277
absgrad = np .abs (fgrad )
221
- if np .max (absgrad ) <= tol :
278
+ max_absgrad = np .max (absgrad )
279
+ check = max_absgrad <= tol
280
+ if is_verbose :
281
+ print (f"Newton-CG iter = { k } " )
282
+ print (" Check Convergence" )
283
+ print (f" max |gradient| <= tol: { max_absgrad } <= { tol } { check } " )
284
+ if check :
222
285
break
223
286
224
287
maggrad = np .sum (absgrad )
@@ -227,14 +290,22 @@ def _newton_cg(
227
290
228
291
# Inner loop: solve the Newton update by conjugate gradient, to
229
292
# avoid inverting the Hessian
230
- xsupi = _cg (fhess_p , fgrad , maxiter = maxinner , tol = termcond )
293
+ xsupi = _cg (fhess_p , fgrad , maxiter = maxinner , tol = termcond , verbose = verbose )
231
294
232
295
alphak = 1.0
233
296
234
297
if line_search :
235
298
try :
236
299
alphak , fc , gc , old_fval , old_old_fval , gfkp1 = _line_search_wolfe12 (
237
- func , grad , xk , xsupi , fgrad , old_fval , old_old_fval , args = args
300
+ func ,
301
+ grad ,
302
+ xk ,
303
+ xsupi ,
304
+ fgrad ,
305
+ old_fval ,
306
+ old_old_fval ,
307
+ verbose = verbose ,
308
+ args = args ,
238
309
)
239
310
except _LineSearchError :
240
311
warnings .warn ("Line Search failed" )
@@ -245,9 +316,14 @@ def _newton_cg(
245
316
246
317
if warn and k >= maxiter :
247
318
warnings .warn (
248
- "newton-cg failed to converge. Increase the number of iterations." ,
319
+ (
320
+ f"newton-cg failed to converge at loss = { old_fval } . Increase the"
321
+ " number of iterations."
322
+ ),
249
323
ConvergenceWarning ,
250
324
)
325
+ elif is_verbose :
326
+ print (f" Solver did converge at loss = { old_fval } ." )
251
327
return xk , k
252
328
253
329
0 commit comments