|
95 | 95 | \Big(F_{\mathcal N}\Big(\frac{x_n +\epsilon -\mu_k}{\sigma_k}\Big)-
|
96 | 96 | F_{\mathcal N}\Big(\frac{x_n -\epsilon -\mu_k}{\sigma_k}\Big) \Big)}{P(x_n)}
|
97 | 97 | $$
|
| 98 | + |
| 99 | + |
| 100 | + |
| 101 | +#### Problem 6.28 (Page 51) |
| 102 | + |
| 103 | +(a)将$\mathbb E_{P(x,y)}$记为$\mathbb E$ |
| 104 | +$$ |
| 105 | +\begin{aligned} |
| 106 | +E_{out}(h) &=\mathbb E[(h(x)-y)^2] \\ |
| 107 | +&=\mathbb E[(h(x)-\mathbb E[y|x]+\mathbb E[y|x]-y)^2] \\ |
| 108 | +&=\mathbb E[(h(x)-\mathbb E[y|x])^2]+ \mathbb E[(\mathbb E[y|x]-y)^2] |
| 109 | ++2\mathbb E[(h(x)-\mathbb E[y|x])(\mathbb E[y|x]-y)]\\ |
| 110 | +&=\mathbb E[(h(x)-\mathbb E[y|x])^2]+ \mathbb E[(\mathbb E[y|x]-y)^2] |
| 111 | ++2\mathbb E[\mathbb E [(h(x)-\mathbb E[y|x])(\mathbb E[y|x]-y)]|x]\\ |
| 112 | +&=\mathbb E[(h(x)-\mathbb E[y|x])^2]+ \mathbb E[(\mathbb E[y|x]-y)^2] |
| 113 | ++2\mathbb E[(h(x)-\mathbb E[y|x]) \mathbb E [(\mathbb E[y|x]-y)]|x]\\ |
| 114 | +&=\mathbb E[(h(x)-\mathbb E[y|x])^2]+ \mathbb E[(\mathbb E[y|x]-y)^2] |
| 115 | ++2\mathbb E[(h(x)-\mathbb E[y|x]) \mathbb (\mathbb E[y|x] -\mathbb E[y|x])]\\ |
| 116 | +&=\mathbb E[(h(x)-\mathbb E[y|x])^2]+ \mathbb E[(\mathbb E[y|x]-y)^2]\\ |
| 117 | +&\ge \mathbb E[(h(x)-\mathbb E[y|x])^2] |
| 118 | +\end{aligned} |
| 119 | +$$ |
| 120 | +当且仅当$y=\mathbb E[y|x]$时等式成立。 |
| 121 | + |
| 122 | +(b)首先证明一个引理: |
| 123 | +$$ |
| 124 | +X= \left( |
| 125 | + \begin{matrix} |
| 126 | + X_1 \\ |
| 127 | + X_2 |
| 128 | + \end{matrix} |
| 129 | + \right) |
| 130 | + \sim N_n |
| 131 | + \left( |
| 132 | + \left( |
| 133 | + \begin{matrix} |
| 134 | + \mu_1 \\ |
| 135 | + \mu_2 |
| 136 | + \end{matrix} |
| 137 | + \right) , |
| 138 | + \left( |
| 139 | + \begin{matrix} |
| 140 | + \sum_{11} & \sum_{12} \\ |
| 141 | + \sum_{21} &\sum_{22} |
| 142 | + \end{matrix} |
| 143 | + \right) |
| 144 | + \right) \\ |
| 145 | + X_1 ,\mu _1 \in \mathbb R^k ,{\sum}_{11}为k阶方阵, |
| 146 | + {\sum}_{11},{\sum}_{21}{\sum}_{22}相应矩阵,|{\sum}_{22}| \neq 0\\ |
| 147 | + |
| 148 | + 那么(1)X_1 \sim N_k (\mu_1, {\sum}_{11})\\ |
| 149 | + (2)在X_1= x_1条件下,X_2的条件分布是\\ |
| 150 | + N_{n-k} |
| 151 | + \left( |
| 152 | + \mu_2+{\sum}_{21}{\sum}_{11}^{-1}(x_1 -\mu_1), |
| 153 | + {\sum}_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12} |
| 154 | + \right) |
| 155 | +$$ |
| 156 | +(1)证明: |
| 157 | + |
| 158 | +令 |
| 159 | +$$ |
| 160 | +B = \left( |
| 161 | + \begin{matrix} |
| 162 | + I_k & 0 \\ |
| 163 | + -{\sum}_{21}{\sum}_{11}^{-1} & I_{n-k} |
| 164 | + \end{matrix} |
| 165 | + \right) (该矩阵为分块初等矩阵) |
| 166 | +$$ |
| 167 | +那么 |
| 168 | +$$ |
| 169 | +\begin{aligned} |
| 170 | +B\sum B'&=\left( |
| 171 | + \begin{matrix} |
| 172 | + I_k & 0 \\ |
| 173 | + -{\sum}_{21}{\sum}_{11}^{-1} & I_{n-k} |
| 174 | + \end{matrix} |
| 175 | + \right) |
| 176 | + \left( |
| 177 | + \begin{matrix} |
| 178 | + \sum_{11} & \sum_{12} \\ |
| 179 | + \sum_{21} &\sum_{22} |
| 180 | + \end{matrix} |
| 181 | + \right) |
| 182 | + |
| 183 | + \left( |
| 184 | + \begin{matrix} |
| 185 | + I_k & -{\sum}_{11}^{-1}{\sum}_{12} \\ |
| 186 | + 0& I_{n-k} |
| 187 | + \end{matrix} |
| 188 | + \right) \\ |
| 189 | + &= |
| 190 | + \left( |
| 191 | + \begin{matrix} |
| 192 | + \sum_{11} & \sum_{12} \\ |
| 193 | + 0 &\sum_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12} |
| 194 | + \end{matrix} |
10000
| 195 | + \right) |
| 196 | + \left( |
| 197 | + \begin{matrix} |
| 198 | + I_k & -{\sum}_{11}^{-1}{\sum}_{12} \\ |
| 199 | + 0& I_{n-k} |
| 200 | + \end{matrix} |
| 201 | + \right)\\ |
| 202 | + &=\left( |
| 203 | + \begin{matrix} |
| 204 | + \sum_{11} & 0 \\ |
| 205 | + 0 &\sum_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12} |
| 206 | + \end{matrix} |
| 207 | + \right) |
| 208 | +\end{aligned}\\ |
| 209 | +B\left( |
| 210 | + \begin{matrix} |
| 211 | + \mu_1 \\ |
| 212 | + \mu_2 |
| 213 | + \end{matrix} |
| 214 | + \right) = \left( |
| 215 | + \begin{matrix} |
| 216 | + I_k & 0 \\ |
| 217 | + -{\sum}_{21}{\sum}_{11}^{-1} & I_{n-k} |
| 218 | + \end{matrix} |
| 219 | + \right) \left( |
| 220 | + \begin{matrix} |
| 221 | + \mu_1 \\ |
| 222 | + \mu_2 |
| 223 | + \end{matrix} |
| 224 | + \right) |
| 225 | + = \left( |
| 226 | + \begin{matrix} |
| 227 | + \mu_1 \\ |
| 228 | + \mu_2 -{\sum}_{21}{\sum}_{11}^{-1} \mu_1 |
| 229 | + \end{matrix} |
| 230 | + \right) |
| 231 | +$$ |
| 232 | +设$Y = BX$,那么 |
| 233 | +$$ |
| 234 | +Y= \left( |
| 235 | + \begin{matrix} |
| 236 | + Y_1 \\ |
| 237 | + Y_2 |
| 238 | + \end{matrix} |
| 239 | + \right) =BX =\left( |
| 240 | + \begin{matrix} |
| 241 | + I_k & 0 \\ |
| 242 | + -{\sum}_{21}{\sum}_{11}^{-1} & I_{n-k} |
| 243 | + \end{matrix} |
| 244 | + \right) |
| 245 | + \left( |
| 246 | + \begin{matrix} |
| 247 | + X_1 \\ |
| 248 | + X_2 |
| 249 | + \end{matrix} |
| 250 | + \right) = |
| 251 | + \left( |
| 252 | + \begin{matrix} |
| 253 | + X_1 \\ |
| 254 | + X_2-{\sum}_{21}{\sum}_{11}^{-1}X_1 |
| 255 | + \end{matrix} |
| 256 | + \right) \\ |
| 257 | + B(X-\mu)=\left( |
| 258 | + \begin{matrix} |
| 259 | + X_1-\mu_1 \\ |
| 260 | + X_2-\mu_2-{\sum}_{21}{\sum}_{11}^{-1}(X_1 -\mu_1) |
| 261 | + \end{matrix} |
| 262 | + \right) |
| 263 | +$$ |
| 264 | +因此 |
| 265 | +$$ |
| 266 | +Y= \left( |
| 267 | + \begin{matrix} |
| 268 | + Y_1 \\ |
| 269 | + Y_2 |
| 270 | + \end{matrix} |
| 271 | + \right) |
| 272 | + \sim N_n |
| 273 | + \left( |
| 274 | + \left( |
| 275 | + \begin{matrix} |
| 276 | + \mu_1 \\ |
| 277 | + \mu_2 -{\sum}_{21}{\sum}_{11}^{-1} \mu_1 |
| 278 | + \end{matrix} |
| 279 | + \right) , |
| 280 | + \left( |
| 281 | + \begin{matrix} |
| 282 | + \sum_{11} & 0 \\ |
| 283 | + 0 &\sum_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12} |
| 284 | + \end{matrix} |
| 285 | + \right) |
| 286 | + \right) |
| 287 | +$$ |
| 288 | +不难看出$Y_1,Y_2$独立,从而 |
| 289 | +$$ |
| 290 | +Y_1=X_1 \sim N_k (\mu_1, {\sum}_{11}) |
| 291 | +$$ |
| 292 | +(2)证明:按定义将$X,X_1$的分布写出来 |
| 293 | +$$ |
| 294 | +f_X(x) = \frac{1}{(2\pi)^{\frac n 2 } |\sum|^{\frac 1 2 }} |
| 295 | +\exp(-\frac 1 2 (x-\mu)^T{\sum}^{-1} (x-\mu)) \\ |
| 296 | +f_{X_1}(x_1) = \frac{1}{(2\pi)^{\frac k 2 } |\sum_{11}|^{\frac 1 2 }} |
| 297 | +\exp(-\frac 1 2 (x_1-\mu_1)^T{\sum}_{11}^{-1} (x_1-\mu_1)) |
| 298 | +$$ |
| 299 | +对$(x-\mu)'{\sum}^{-1} (x-\mu)$进行处理,利用上一题的$B$,不难看出$B$为正交矩阵,即$B^TB=BB^T =I$,所以 |
| 300 | +$$ |
| 301 | +\begin{aligned} |
| 302 | +(x-\mu)^T{\sum}^{-1} (x-\mu) |
| 303 | +&= (x-\mu)^TB^T B {\sum}^{-1} B^T B (x-\mu) \\ |
| 304 | +&= (B(x-\mu))^T (B{\sum} B^T)^{-1} (B (x-\mu)) \\ |
| 305 | +&= \left( |
| 306 | + \begin{matrix} |
| 307 | + x_1-\mu_1 \\ |
| 308 | + x_2-\mu_2-{\sum}_{21}{\sum}_{11}^{-1}(x_1 -\mu_1) |
| 309 | + \end{matrix} |
| 310 | + \right)^T\left( |
| 311 | + \begin{matrix} |
| 312 | + \sum_{11} & 0 \\ |
| 313 | + 0 &\sum_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12} |
| 314 | + \end{matrix} |
| 315 | + \right)\left( |
| 316 | + \begin{matrix} |
| 317 | + x_1-\mu_1 \\ |
| 318 | + x_2-\mu_2-{\sum}_{21}{\sum}_{11}^{-1}(x_1 -\mu_1) |
| 319 | + \end{matrix} |
| 320 | + \right) \\ |
| 321 | + &=( x_1-\mu_1 )^T \sum_{11}( x_1-\mu_1 )+ |
| 322 | + (x_2-\mu_2-{\sum}_{21}{\sum}_{11}^{-1}(x_1 -\mu_1))^T |
| 323 | + ( \sum_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12}) |
| 324 | + (x_2-\mu_2-{\sum}_{21}{\sum}_{11}^{-1}(x_1 -\mu_1)) |
| 325 | +\end{aligned} |
| 326 | +$$ |
| 327 | +注意到 |
| 328 | +$$ |
| 329 | +|\sum| =\det (\left( |
| 330 | + \begin{matrix} |
| 331 | + \sum_{11} & 0 \\ |
| 332 | + 0 &\sum_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12} |
| 333 | + \end{matrix} |
| 334 | + \right)) =|\sum_{11}||\sum_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12}| |
| 335 | +$$ |
| 336 | +所以条件概率为 |
| 337 | +$$ |
| 338 | +\begin{aligned} |
| 339 | +f_{X_2|X_1}(x|x_1)&= |
| 340 | +\frac{\frac{1}{(2\pi)^{\frac n 2 } |\sum|^{\frac 1 2 }} |
| 341 | +\exp(( x_1-\mu_1 )^T \sum_{11}( x_1-\mu_1 )+ |
| 342 | + (x_2-\mu_2-{\sum}_{21}{\sum}_{11}^{-1}(x_1 -\mu_1))^T |
| 343 | + ( \sum_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12}) |
| 344 | + (x_2-\mu_2-{\sum}_{21}{\sum}_{11}^{-1}(x_1 -\mu_1)))} |
| 345 | +{\frac{1}{(2\pi)^{\frac k 2 } |\sum_{11}|^{\frac 1 2 }} |
| 346 | +\exp(-\frac 1 2 (x_1-\mu_1)^T{\sum}_{11}^{-1} (x_1-\mu_1))}\\ |
| 347 | +&= \frac{1}{(2\pi)^{\frac {n-k} 2 }|\sum_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12}|^{\frac 1 2 }} |
| 348 | +\exp((x_2-\mu_2-{\sum}_{21}{\sum}_{11}^{-1}(x_1 -\mu_1))^T |
| 349 | + ( {\sum}_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12}) |
| 350 | + (x_2-\mu_2-{\sum}_{21}{\sum}_{11}^{-1}(x_1 -\mu_1))) |
| 351 | +\end{aligned} |
| 352 | +$$ |
| 353 | +即 |
| 354 | +$$ |
| 355 | +f_{X_2|X_1}(x|x_1) \sim N_{n-k} |
| 356 | + \left( |
| 357 | + \mu_2+{\sum}_{21}{\sum}_{11}^{-1}(x_1 -\mu_1), |
| 358 | + {\sum}_{22} -{\sum}_{21}{\sum}_{11}^{-1}{\sum}_{12} |
| 359 | + \right) |
| 360 | +$$ |
| 361 | + |
| 362 | + |
| 363 | + |
| 364 | + |
| 365 | +回到原题,记$Z_k \sim N(\mu_k, S_k^{-1})$,对应的$X,Y$分别记为$X_k,Y_k$那么 |
| 366 | +$$ |
| 367 | +f_Z(z) = \sum_{k=1}^K \frac{w_k |S_k|^{\frac 1 2 }}{(2\pi)^{\frac{d+1}{2}}} |
| 368 | +\exp(-\frac 1 2 (z-\mu_k)^T S_k (z-\mu_k))=\sum_{k=1}^K |
| 369 | +w_k f_{Z_k}(z) |
| 370 | +$$ |
| 371 | +由引理的第一部分可知 |
| 372 | +$$ |
| 373 | +f_X(x) = \sum_{k=1}^K \frac{w_k |A_k|^{\frac 1 2 }}{(2\pi)^{\frac{d}{2}}} |
| 374 | +\exp(-\frac 1 2 (x-\alpha_k)^T A_k (x-\alpha_k)) |
| 375 | += \sum_{k=1}^K w_k f_{X_k}(x) |
| 376 | +$$ |
| 377 | +所以 |
| 378 | +$$ |
| 379 | +f_{Y|X}(y|x) =\frac{\sum_{k=1}^K w_k f_{X_k}(x)f_{Y_k|X_k}(y|x)}{f_X(x)} |
| 380 | +$$ |
| 381 | +由引理的第二部分可知 |
| 382 | +$$ |
| 383 | +f_{Y_k|X_k}(y|x) \sim N(\beta_k+\frac 1 {c_k}b_k^T(x-\mu_k) , S_k^* )\\ |
| 384 | +S_k^*可由引理计算出来 |
| 385 | +$$ |
| 386 | +对上式取期望可得 |
| 387 | +$$ |
| 388 | +\mathbb E_{Y_k|X_k}[y|x] = \beta_k+\frac 1 {c_k}b_k^T(x-\mu_k) |
| 389 | +$$ |
| 390 | +对整体取期望可得 |
| 391 | +$$ |
| 392 | +\begin{aligned} |
| 393 | +g(x) &=\mathbb E[y|x]\\ |
| 394 | +&= \frac{\sum_{k=1}^K \frac{w_k |A_k|^{\frac 1 2 }}{(2\pi)^{\frac{d}{2}}} |
| 395 | +\exp(-\frac 1 2 (x-\alpha_k)^T A_k (x-\alpha_k))(\beta_k+\frac 1 {c_k}b_k^T(x-\mu_k))}{\sum_{k=1}^K \frac{w_k |A_k|^{\frac 1 2 }}{(2\pi)^{\frac{d}{2}}} |
| 396 | +\exp(-\frac 1 2 (x-\alpha_k)^T A_k (x-\alpha_k)} \\ |
| 397 | +&=\frac{\sum_{k=1}^K {w_k |A_k|^{\frac 1 2 }} |
| 398 | +\exp(-\frac 1 2 (x-\alpha_k)^T A_k (x-\alpha_k))(\beta_k+\frac 1 {c_k}b_k^T(x-\mu_k))}{\sum_{k=1}^K {w_k |A_k|^{\frac 1 2 }} |
| 399 | +\exp(-\frac 1 2 (x-\alpha_k)^T A_k (x-\alpha_k))} |
| 400 | +\end{aligned} |
| 401 | +$$ |
| 402 | + |
| 403 | + |
| 404 | +(c)此时为(b)的特殊情形 |
| 405 | +$$ |
| 406 | +K= N,w_k =\frac 1 K ,\alpha_k= x_k,\beta_k =y_k,S_k =r^2 I |
| 407 | +$$ |
| 408 | +带入上式可得 |
| 409 | +$$ |
| 410 | +\begin{aligned} |
| 411 | +g(x) &=\mathbb E[y|x]\\ |
| 412 | +&=\frac{\sum_{n=1}^N \exp({-\frac 1 {2r^2}||x-x_n||^2 })y_n}{\sum_{n=1}^N \exp({-\frac 1 {2r^2}||x-x_n||^2 })} |
| 413 | +\end{aligned} |
| 414 | +$$ |
0 commit comments