| 1 | /* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
| 2 | |
| 3 | /* |
| 4 | Copyright (C) 2003 Ferdinando Ametrano |
| 5 | Copyright (C) 2003 RiskMap srl |
| 6 | Copyright (C) 2005 Gary Kennedy |
| 7 | Copyright (C) 2015 Peter Caspers |
| 8 | |
| 9 | This file is part of QuantLib, a free-software/open-source library |
| 10 | for financial quantitative analysts and developers - http://quantlib.org/ |
| 11 | |
| 12 | QuantLib is free software: you can redistribute it and/or modify it |
| 13 | under the terms of the QuantLib license. You should have received a |
| 14 | copy of the license along with this program; if not, please email |
| 15 | <quantlib-dev@lists.sf.net>. The license is also available online at |
| 16 | <http://quantlib.org/license.shtml>. |
| 17 | |
| 18 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 19 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 20 | FOR A PARTICULAR PURPOSE. See the license for more details. |
| 21 | */ |
| 22 | |
| 23 | #include "stats.hpp" |
| 24 | #include "utilities.hpp" |
| 25 | #include <ql/math/statistics/statistics.hpp> |
| 26 | #include <ql/math/statistics/incrementalstatistics.hpp> |
| 27 | #include <ql/math/statistics/gaussianstatistics.hpp> |
| 28 | #include <ql/math/statistics/sequencestatistics.hpp> |
| 29 | #include <ql/math/statistics/convergencestatistics.hpp> |
| 30 | #include <ql/math/randomnumbers/mt19937uniformrng.hpp> |
| 31 | #include <ql/math/randomnumbers/inversecumulativerng.hpp> |
| 32 | #include <ql/math/distributions/normaldistribution.hpp> |
| 33 | #include <ql/math/comparison.hpp> |
| 34 | #include <ql/utilities/dataformatters.hpp> |
| 35 | |
| 36 | using namespace QuantLib; |
| 37 | using namespace boost::unit_test_framework; |
| 38 | |
| 39 | namespace { |
| 40 | |
| 41 | Real data[] = { 3.0, 4.0, 5.0, 2.0, 3.0, 4.0, 5.0, 6.0, 4.0, 7.0 }; |
| 42 | Real weights[] = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; |
| 43 | |
| 44 | template <class S> |
| 45 | void check(const std::string& name) { |
| 46 | |
| 47 | S s; |
| 48 | for (Size i=0; i<LENGTH(data); i++) |
| 49 | s.add(data[i],weights[i]); |
| 50 | |
| 51 | Real calculated, expected; |
| 52 | Real tolerance; |
| 53 | |
| 54 | if (s.samples() != LENGTH(data)) |
| 55 | BOOST_FAIL(name << ": wrong number of samples\n" |
| 56 | << " calculated: " << s.samples() << "\n" |
| 57 | << " expected: " << LENGTH(data)); |
| 58 | |
| 59 | expected = std::accumulate(first: weights,last: weights+LENGTH(weights),init: Real(0.0)); |
| 60 | calculated = s.weightSum(); |
| 61 | if (calculated != expected) |
| 62 | BOOST_FAIL(name << ": wrong sum of weights\n" |
| 63 | << " calculated: " << calculated << "\n" |
| 64 | << " expected: " << expected); |
| 65 | |
| 66 | expected = *std::min_element(first: data,last: data+LENGTH(data)); |
| 67 | calculated = s.min(); |
| 68 | if (calculated != expected) |
| 69 | BOOST_FAIL(name << ": wrong minimum value\n" |
| 70 | << " calculated: " << calculated << "\n" |
| 71 | << " expected: " << expected); |
| 72 | |
| 73 | expected = *std::max_element(first: data,last: data+LENGTH(data)); |
| 74 | calculated = s.max(); |
| 75 | if (calculated != expected) |
| 76 | BOOST_FAIL(name << ": wrong maximum value\n" |
| 77 | << " calculated: " << calculated << "\n" |
| 78 | << " expected: " << expected); |
| 79 | |
| 80 | expected = 4.3; |
| 81 | tolerance = 1.0e-9; |
| 82 | calculated = s.mean(); |
| 83 | if (std::fabs(x: calculated-expected) > tolerance) |
| 84 | BOOST_FAIL(name << ": wrong mean value\n" |
| 85 | << " calculated: " << calculated << "\n" |
| 86 | << " expected: " << expected); |
| 87 | |
| 88 | expected = 2.23333333333; |
| 89 | calculated = s.variance(); |
| 90 | if (std::fabs(x: calculated-expected) > tolerance) |
| 91 | BOOST_FAIL(name << ": wrong variance\n" |
| 92 | << " calculated: " << calculated << "\n" |
| 93 | << " expected: " << expected); |
| 94 | |
| 95 | expected = 1.4944341181; |
| 96 | calculated = s.standardDeviation(); |
| 97 | if (std::fabs(x: calculated-expected) > tolerance) |
| 98 | BOOST_FAIL(name << ": wrong standard deviation\n" |
| 99 | << " calculated: " << calculated << "\n" |
| 100 | << " expected: " << expected); |
| 101 | |
| 102 | expected = 0.359543071407; |
| 103 | calculated = s.skewness(); |
| 104 | if (std::fabs(x: calculated-expected) > tolerance) |
| 105 | BOOST_FAIL(name << ": wrong skewness\n" |
| 106 | << " calculated: " << calculated << "\n" |
| 107 | << " expected: " << expected); |
| 108 | |
| 109 | expected = -0.151799637209; |
| 110 | calculated = s.kurtosis(); |
| 111 | if (std::fabs(x: calculated-expected) > tolerance) |
| 112 | BOOST_FAIL(name << ": wrong kurtosis\n" |
| 113 | << " calculated: " << calculated << "\n" |
| 114 | << " expected: " << expected); |
| 115 | } |
| 116 | |
| 117 | } |
| 118 | |
| 119 | |
| 120 | void StatisticsTest::testStatistics() { |
| 121 | |
| 122 | BOOST_TEST_MESSAGE("Testing statistics..." ); |
| 123 | |
| 124 | check<IncrementalStatistics>( |
| 125 | name: std::string("IncrementalStatistics" )); |
| 126 | check<Statistics>(name: std::string("Statistics" )); |
| 127 | } |
| 128 | |
| 129 | |
| 130 | namespace { |
| 131 | |
| 132 | template <class S> |
| 133 | void checkSequence(const std::string& name, Size dimension) { |
| 134 | |
| 135 | GenericSequenceStatistics<S> ss(dimension); |
| 136 | Size i; |
| 137 | for (i = 0; i<LENGTH(data); i++) { |
| 138 | std::vector<Real> temp(dimension, data[i]); |
| 139 | ss.add(temp, weights[i]); |
| 140 | } |
| 141 | |
| 142 | std::vector<Real> calculated; |
| 143 | Real expected, tolerance; |
| 144 | |
| 145 | if (ss.samples() != LENGTH(data)) |
| 146 | BOOST_FAIL("SequenceStatistics<" << name << ">: " |
| 147 | << "wrong number of samples\n" |
| 148 | << " calculated: " << ss.samples() << "\n" |
| 149 | << " expected: " << LENGTH(data)); |
| 150 | |
| 151 | expected = std::accumulate(first: weights,last: weights+LENGTH(weights),init: Real(0.0)); |
| 152 | if (ss.weightSum() != expected) |
| 153 | BOOST_FAIL("SequenceStatistics<" << name << ">: " |
| 154 | << "wrong sum of weights\n" |
| 155 | << " calculated: " << ss.weightSum() << "\n" |
| 156 | << " expected: " << expected); |
| 157 | |
| 158 | expected = *std::min_element(first: data,last: data+LENGTH(data)); |
| 159 | calculated = ss.min(); |
| 160 | for (i=0; i<dimension; i++) { |
| 161 | if (calculated[i] != expected) |
| 162 | BOOST_FAIL("SequenceStatistics<" << name << ">: " |
| 163 | << io::ordinal(i+1) << " dimension: " |
| 164 | << "wrong minimum value\n" |
| 165 | << " calculated: " << calculated[i] << "\n" |
| 166 | << " expected: " << expected); |
| 167 | } |
| 168 | |
| 169 | expected = *std::max_element(first: data,last: data+LENGTH(data)); |
| 170 | calculated = ss.max(); |
| 171 | for (i=0; i<dimension; i++) { |
| 172 | if (calculated[i] != expected) |
| 173 | BOOST_FAIL("SequenceStatistics<" << name << ">: " |
| 174 | << io::ordinal(i+1) << " dimension: " |
| 175 | << "wrong maximun value\n" |
| 176 | << " calculated: " << calculated[i] << "\n" |
| 177 | << " expected: " << expected); |
| 178 | } |
| 179 | |
| 180 | expected = 4.3; |
| 181 | tolerance = 1.0e-9; |
| 182 | calculated = ss.mean(); |
| 183 | for (i=0; i<dimension; i++) { |
| 184 | if (std::fabs(x: calculated[i]-expected) > tolerance) |
| 185 | BOOST_FAIL("SequenceStatistics<" << name << ">: " |
| 186 | << io::ordinal(i+1) << " dimension: " |
| 187 | << "wrong mean value\n" |
| 188 | << " calculated: " << calculated[i] << "\n" |
| 189 | << " expected: " << expected); |
| 190 | } |
| 191 | |
| 192 | expected = 2.23333333333; |
| 193 | calculated = ss.variance(); |
| 194 | for (i=0; i<dimension; i++) { |
| 195 | if (std::fabs(x: calculated[i]-expected) > tolerance) |
| 196 | BOOST_FAIL("SequenceStatistics<" << name << ">: " |
| 197 | << io::ordinal(i+1) << " dimension: " |
| 198 | << "wrong variance\n" |
| 199 | << " calculated: " << calculated[i] << "\n" |
| 200 | << " expected: " << expected); |
| 201 | } |
| 202 | |
| 203 | expected = 1.4944341181; |
| 204 | calculated = ss.standardDeviation(); |
| 205 | for (i=0; i<dimension; i++) { |
| 206 | if (std::fabs(x: calculated[i]-expected) > tolerance) |
| 207 | BOOST_FAIL("SequenceStatistics<" << name << ">: " |
| 208 | << io::ordinal(i+1) << " dimension: " |
| 209 | << "wrong standard deviation\n" |
| 210 | << " calculated: " << calculated[i] << "\n" |
| 211 | << " expected: " << expected); |
| 212 | } |
| 213 | |
| 214 | expected = 0.359543071407; |
| 215 | calculated = ss.skewness(); |
| 216 | for (i=0; i<dimension; i++) { |
| 217 | if (std::fabs(x: calculated[i]-expected) > tolerance) |
| 218 | BOOST_FAIL("SequenceStatistics<" << name << ">: " |
| 219 | << io::ordinal(i+1) << " dimension: " |
| 220 | << "wrong skewness\n" |
| 221 | << " calculated: " << calculated[i] << "\n" |
| 222 | << " expected: " << expected); |
| 223 | } |
| 224 | |
| 225 | expected = -0.151799637209; |
| 226 | calculated = ss.kurtosis(); |
| 227 | for (i=0; i<dimension; i++) { |
| 228 | if (std::fabs(x: calculated[i]-expected) > tolerance) |
| 229 | BOOST_FAIL("SequenceStatistics<" << name << ">: " |
| 230 | << io::ordinal(i+1) << " dimension: " |
| 231 | << "wrong kurtosis\n" |
| 232 | << " calculated: " << calculated[i] << "\n" |
| 233 | << " expected: " << expected); |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | } |
| 238 | |
| 239 | |
| 240 | void StatisticsTest::testSequenceStatistics() { |
| 241 | |
| 242 | BOOST_TEST_MESSAGE("Testing sequence statistics..." ); |
| 243 | |
| 244 | checkSequence<IncrementalStatistics>( |
| 245 | name: std::string("IncrementalStatistics" ),dimension: 5); |
| 246 | checkSequence<Statistics>(name: std::string("Statistics" ),dimension: 5); |
| 247 | } |
| 248 | |
| 249 | |
| 250 | namespace { |
| 251 | |
| 252 | template <class S> |
| 253 | void checkConvergence(const std::string& name) { |
| 254 | |
| 255 | ConvergenceStatistics<S> stats; |
| 256 | |
| 257 | stats.add(1.0); |
| 258 | stats.add(2.0); |
| 259 | stats.add(3.0); |
| 260 | stats.add(4.0); |
| 261 | stats.add(5.0); |
| 262 | stats.add(6.0); |
| 263 | stats.add(7.0); |
| 264 | stats.add(8.0); |
| 265 | |
| 266 | const Size expectedSize1 = 3; |
| 267 | Size calculatedSize = stats.convergenceTable().size(); |
| 268 | if (calculatedSize != expectedSize1) |
| 269 | BOOST_FAIL("ConvergenceStatistics<" << name << ">: " |
| 270 | << "\nwrong convergence-table size" |
| 271 | << "\n calculated: " << calculatedSize |
| 272 | << "\n expected: " << expectedSize1); |
| 273 | |
| 274 | const Real expectedValue1 = 4.0; |
| 275 | const Real tolerance = 1.0e-9; |
| 276 | Real calculatedValue = stats.convergenceTable().back().second; |
| 277 | if (std::fabs(x: calculatedValue-expectedValue1) > tolerance) |
| 278 | BOOST_FAIL("wrong last value in convergence table" |
| 279 | << "\n calculated: " << calculatedValue |
| 280 | << "\n expected: " << expectedValue1); |
| 281 | |
| 282 | const Size expectedSampleSize1 = 7; |
| 283 | Size calculatedSamples = stats.convergenceTable().back().first; |
| 284 | if (calculatedSamples != expectedSampleSize1) |
| 285 | BOOST_FAIL("wrong number of samples in convergence table" |
| 286 | << "\n calculated: " << calculatedSamples |
| 287 | << "\n expected: " << expectedSampleSize1); |
| 288 | |
| 289 | stats.reset(); |
| 290 | stats.add(1.0); |
| 291 | stats.add(2.0); |
| 292 | stats.add(3.0); |
| 293 | stats.add(4.0); |
| 294 | |
| 295 | const Size expectedSize2 = 2; |
| 296 | calculatedSize = stats.convergenceTable().size(); |
| 297 | if (calculatedSize != expectedSize2) |
| 298 | BOOST_FAIL("wrong convergence-table size" |
| 299 | << "\n calculated: " << calculatedSize |
| 300 | << "\n expected: " << expectedSize2); |
| 301 | |
| 302 | const Real expectedValue2 = 2.0; |
| 303 | calculatedValue = stats.convergenceTable().back().second; |
| 304 | if (std::fabs(x: calculatedValue-expectedValue2) > tolerance) |
| 305 | BOOST_FAIL("wrong last value in convergence table" |
| 306 | << "\n calculated: " << calculatedValue |
| 307 | << "\n expected: " << expectedValue2); |
| 308 | |
| 309 | const Size expectedSampleSize2 = 3; |
| 310 | calculatedSamples = stats.convergenceTable().back().first; |
| 311 | if (calculatedSamples != expectedSampleSize2) |
| 312 | BOOST_FAIL("wrong number of samples in convergence table" |
| 313 | << "\n calculated: " << calculatedSamples |
| 314 | << "\n expected: " << expectedSampleSize2); |
| 315 | } |
| 316 | |
| 317 | } |
| 318 | |
| 319 | |
| 320 | void StatisticsTest::testConvergenceStatistics() { |
| 321 | |
| 322 | BOOST_TEST_MESSAGE("Testing convergence statistics..." ); |
| 323 | |
| 324 | checkConvergence<IncrementalStatistics>( |
| 325 | name: std::string("IncrementalStatistics" )); |
| 326 | checkConvergence<Statistics>(name: std::string("Statistics" )); |
| 327 | } |
| 328 | |
| 329 | #define TEST_INC_STAT(expr, expected) \ |
| 330 | if (!close_enough(expr, expected)) \ |
| 331 | BOOST_ERROR(std::setprecision(16) \ |
| 332 | << std::scientific << #expr << " (" << expr \ |
| 333 | << ") can not be reproduced against cached result (" \ |
| 334 | << expected << ")"); |
| 335 | |
| 336 | void StatisticsTest::testIncrementalStatistics() { |
| 337 | |
| 338 | BOOST_TEST_MESSAGE("Testing incremental statistics..." ); |
| 339 | |
| 340 | // With QuantLib 1.7 IncrementalStatistics was changed to |
| 341 | // a wrapper to the boost accumulator library. This is |
| 342 | // a test of the new implementation against results from |
| 343 | // the old one. |
| 344 | |
| 345 | MersenneTwisterUniformRng mt(42); |
| 346 | |
| 347 | IncrementalStatistics stat; |
| 348 | |
| 349 | for (Size i = 0; i < 500000; ++i) { |
| 350 | Real x = 2.0 * (mt.nextReal() - 0.5) * 1234.0; |
| 351 | Real w = mt.nextReal(); |
| 352 | stat.add(value: x, weight: w); |
| 353 | } |
| 354 | |
| 355 | if (stat.samples() != 500000) |
| 356 | BOOST_ERROR("stat.samples() (" << stat.samples() |
| 357 | << ") can not be reproduced against cached result (" |
| 358 | << 500000 << ")" ); |
| 359 | TEST_INC_STAT(stat.weightSum(), 2.5003623600676749e+05); |
| 360 | TEST_INC_STAT(stat.mean(), 4.9122325964293845e-01); |
| 361 | TEST_INC_STAT(stat.variance(), 5.0706503959683329e+05); |
| 362 | TEST_INC_STAT(stat.standardDeviation(), 7.1208499464378076e+02); |
| 363 | TEST_INC_STAT(stat.errorEstimate(), 1.0070402569876076e+00); |
| 364 | TEST_INC_STAT(stat.skewness(), -1.7360169326720038e-03); |
| 365 | TEST_INC_STAT(stat.kurtosis(), -1.1990742562085395e+00); |
| 366 | TEST_INC_STAT(stat.min(), -1.2339945045639761e+03); |
| 367 | TEST_INC_STAT(stat.max(), 1.2339958308008499e+03); |
| 368 | TEST_INC_STAT(stat.downsideVariance(), 5.0786776146975247e+05); |
| 369 | TEST_INC_STAT(stat.downsideDeviation(), 7.1264841364431061e+02); |
| 370 | |
| 371 | // This is a test for numerical stability, |
| 372 | // where the old implementation fails |
| 373 | |
| 374 | InverseCumulativeRng<MersenneTwisterUniformRng,InverseCumulativeNormal> normal_gen(mt); |
| 375 | |
| 376 | IncrementalStatistics stat2; |
| 377 | |
| 378 | for (Size i = 0; i < 500000; ++i) { |
| 379 | Real x = normal_gen.next().value * 1E-1 + 1E8; |
| 380 | Real w = 1.0; |
| 381 | stat2.add(value: x, weight: w); |
| 382 | } |
| 383 | |
| 384 | Real tol = 1E-5; |
| 385 | |
| 386 | if(std::fabs( x: stat2.variance() - 1E-2 ) > tol) |
| 387 | BOOST_ERROR("variance (" << stat2.variance() |
| 388 | << ") out of expected range " << 1E-2 << " +- " |
| 389 | << tol); |
| 390 | } |
| 391 | |
| 392 | test_suite* StatisticsTest::suite() { |
| 393 | auto* suite = BOOST_TEST_SUITE("Statistics tests" ); |
| 394 | suite->add(QUANTLIB_TEST_CASE(&StatisticsTest::testStatistics)); |
| 395 | suite->add(QUANTLIB_TEST_CASE(&StatisticsTest::testSequenceStatistics)); |
| 396 | suite->add(QUANTLIB_TEST_CASE(&StatisticsTest::testConvergenceStatistics)); |
| 397 | suite->add(QUANTLIB_TEST_CASE(&StatisticsTest::testIncrementalStatistics)); |
| 398 | return suite; |
| 399 | } |
| 400 | |