@@ -1148,6 +1148,64 @@ The final prediction goes to the largest posterior. This is known as the
1148
1148
'female'
1149
1149
1150
1150
1151
+ Sampling from kernel density estimation
1152
+ ***************************************
1153
+
1154
+ The :func: `kde() ` function creates a continuous probability density
1155
+ function from discrete samples. Some applications need a way to make
1156
+ random selections from that distribution.
1157
+
1158
+ The technique is to pick a sample from a bandwidth scaled kernel
1159
+ function and recenter the result around a randomly chosen point from
1160
+ the input data. This can be done with any kernel that has a known or
1161
+ accurately approximated inverse cumulative distribution function.
1162
+
1163
+ .. testcode ::
1164
+
1165
+ from random import choice, random, seed
1166
+ from math import sqrt, log, pi, tan, asin
1167
+ from statistics import NormalDist
1168
+
1169
+ kernel_invcdfs = {
1170
+ 'normal': NormalDist().inv_cdf,
1171
+ 'logistic': lambda p: log(p / (1 - p)),
1172
+ 'sigmoid': lambda p: log(tan(p * pi/2)),
1173
+ 'rectangular': lambda p: 2*p - 1,
1174
+ 'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
1175
+ 'cosine': lambda p: 2*asin(2*p - 1)/pi,
1176
+ }
1177
+
1178
+ def kde_random(data, h, kernel='normal'):
1179
+ 'Return a function that samples from kde() smoothed data.'
1180
+ kernel_invcdf = kernel_invcdfs[kernel]
1181
+ def rand():
1182
+ return h * kernel_invcdf(random()) + choice(data)
1183
+ return rand
1184
+
1185
+ For example:
1186
+
1187
+ .. doctest ::
1188
+
1189
+ >>> discrete_samples = [- 2.1 , - 1.3 , - 0.4 , 1.9 , 5.1 , 6.2 ]
1190
+ >>> rand = kde_random(discrete_samples, h = 1.5 )
1191
+ >>> seed(8675309 )
1192
+ >>> selections = [rand() for i in range (10 )]
1193
+ >>> [round (x, 1 ) for x in selections]
1194
+ [4.7, 7.4, 1.2, 7.8, 6.9, -1.3, 5.8, 0.2, -1.4, 5.7]
1195
+
1196
+ .. testcode ::
1197
+ :hide:
1198
+
1199
+ from statistics import kde
1200
+ from math import isclose
1201
+
1202
+ # Verify that cdf / invcdf will round trip
1203
+ xarr = [i/100 for i in range(-100, 101)]
1204
+ for kernel, invcdf in kernel_invcdfs.items():
1205
+ cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True)
1206
+ for x in xarr:
1207
+ assert isclose(invcdf(cdf(x)), x, abs_tol=1E-9)
1208
+
1151
1209
..
1152
1210
# This modelines must appear within the last ten lines of the file.
1153
1211
kate: indent-width 3; remove-trailing-space on; replace-tabs on; encoding utf-8;
0 commit comments