|
66 | 66 | import numpy as np
|
67 | 67 |
|
68 | 68 | from matplotlib import pyplot as plt
|
69 |
| - |
| 69 | +from sklearn.datasets import fetch_openml |
70 | 70 | from sklearn.gaussian_process import GaussianProcessRegressor
|
71 | 71 | from sklearn.gaussian_process.kernels \
|
72 | 72 | import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared
|
|
79 | 79 | print(__doc__)
|
80 | 80 |
|
81 | 81 |
|
82 |
| -def load_mauna_loa_atmospheric_c02(): |
83 |
| - url = ('http://cdiac.ess-dive.lbl.gov/' |
84 |
| - 'ftp/trends/co2/sio-keel-flask/maunaloa_c.dat') |
| 82 | +def load_mauna_loa_atmospheric_co2(): |
| 83 | + ml_data = fetch_openml(data_id=41187) |
85 | 84 | months = []
|
86 | 85 | ppmv_sums = []
|
87 | 86 | counts = []
|
88 |
| - for line in urlopen(url): |
89 |
| - line = line.decode('utf8') |
90 |
| - if not line.startswith('MLO'): |
91 |
| - # ignore headers |
92 |
| - continue |
93 |
| - station, date, weight, flag, ppmv = line.split() |
94 |
| - y = date[:2] |
95 |
| - m = date[2:4] |
96 |
| - month_float = (int(('20' if y < '20' else '19') + y) + |
97 |
| - (int(m) - 1) / 12) |
98 |
| - if not months or month_float != months[-1]: |
99 |
| - months.append(month_float) |
100 |
| - ppmv_sums.append(float(ppmv)) |
| 87 | + |
| 88 | + y = ml_data.data[:, 0] |
| 89 | + m = ml_data.data[:, 1] |
| 90 | + month_float = y + (m - 1) / 12 |
| 91 | + ppmvs = ml_data.target |
| 92 | + |
| 93 | + for month, ppmv in zip(month_float, ppmvs): |
| 94 | + if not months or month != months[-1]: |
| 95 | + months.append(month) |
| 96 | + ppmv_sums.append(ppmv) |
101 | 97 | counts.append(1)
|
102 | 98 | else:
|
103 | 99 | # aggregate monthly sum to produce average
|
104 |
| - ppmv_sums[-1] += float(ppmv) |
| 100 | + ppmv_sums[-1] += ppmv |
105 | 101 | counts[-1] += 1
|
106 | 102 |
|
107 | 103 | months = np.asarray(months).reshape(-1, 1)
|
108 | 104 | avg_ppmvs = np.asarray(ppmv_sums) / counts
|
109 | 105 | return months, avg_ppmvs
|
110 | 106 |
|
111 | 107 |
|
112 |
| -X, y = load_mauna_loa_atmospheric_c02() |
| 108 | +X, y = load_mauna_loa_atmospheric_co2() |
113 | 109 |
|
114 | 110 | # Kernel with parameters given in GPML book
|
115 | 111 | k1 = 66.0**2 * RBF(length_scale=67.0) # long term smooth rising trend
|
|
0 commit comments