|
66 | 66 | import numpy as np |
67 | 67 |
|
68 | 68 | from matplotlib import pyplot as plt |
69 | | - |
| 69 | +from sklearn.datasets import fetch_openml |
70 | 70 | from sklearn.gaussian_process import GaussianProcessRegressor |
71 | 71 | from sklearn.gaussian_process.kernels \ |
72 | 72 | import RBF, WhiteKernel, RationalQuadratic, ExpSineSquared |
|
79 | 79 | print(__doc__) |
80 | 80 |
|
81 | 81 |
|
82 | | -def load_mauna_loa_atmospheric_c02(): |
83 | | - url = ('http://cdiac.ess-dive.lbl.gov/' |
84 | | - 'ftp/trends/co2/sio-keel-flask/maunaloa_c.dat') |
| 82 | +def load_mauna_loa_atmospheric_co2(): |
| 83 | + ml_data = fetch_openml(data_id=41187) |
85 | 84 | months = [] |
86 | 85 | ppmv_sums = [] |
87 | 86 | counts = [] |
88 | | - for line in urlopen(url): |
89 | | - line = line.decode('utf8') |
90 | | - if not line.startswith('MLO'): |
91 | | - # ignore headers |
92 | | - continue |
93 | | - station, date, weight, flag, ppmv = line.split() |
94 | | - y = date[:2] |
95 | | - m = date[2:4] |
96 | | - month_float = (int(('20' if y < '20' else '19') + y) + |
97 | | - (int(m) - 1) / 12) |
98 | | - if not months or month_float != months[-1]: |
99 | | - months.append(month_float) |
100 | | - ppmv_sums.append(float(ppmv)) |
| 87 | + |
| 88 | + y = ml_data.data[:, 0] |
| 89 | + m = ml_data.data[:, 1] |
| 90 | + month_float = y + (m - 1) / 12 |
| 91 | + ppmvs = ml_data.target |
| 92 | + |
| 93 | + for month, ppmv in zip(month_float, ppmvs): |
| 94 | + if not months or month != months[-1]: |
| 95 | + months.append(month) |
| 96 | + ppmv_sums.append(ppmv) |
101 | 97 | counts.append(1) |
102 | 98 | else: |
103 | 99 | # aggregate monthly sum to produce average |
104 | | - ppmv_sums[-1] += float(ppmv) |
| 100 | + ppmv_sums[-1] += ppmv |
105 | 101 | counts[-1] += 1 |
106 | 102 |
|
107 | 103 | months = np.asarray(months).reshape(-1, 1) |
108 | 104 | avg_ppmvs = np.asarray(ppmv_sums) / counts |
109 | 105 | return months, avg_ppmvs |
110 | 106 |
|
111 | 107 |
|
112 | | -X, y = load_mauna_loa_atmospheric_c02() |
| 108 | +X, y = load_mauna_loa_atmospheric_co2() |
113 | 109 |
|
114 | 110 | # Kernel with parameters given in GPML book |
115 | 111 | k1 = 66.0**2 * RBF(length_scale=67.0) # long term smooth rising trend |
|
0 commit comments