10000 wip: numerically integrate vine PDF using quadpy · wgurecky/StarVine@f3d00a0 · GitHub
[go: up one dir, main page]

Skip to content

Commit

Permalink
wip: numerically integrate vine PDF using quadpy
Browse files Browse the repository at this point in the history
  • Loading branch information
wgurecky committed Feb 4, 2020
1 parent aac77de commit f3d00a0
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 17 deletions.
2 changes: 1 addition & 1 deletion starvine/mvar/mv_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def matrixPairPlot(data_in, weights=None, corr_stat="kendalltau", **kwargs):
else:
data = data_in
upper_kde = kwargs.pop("kde", False)
pair_plot = sns.PairGrid(data, palette=["red"], size=kwargs.pop("size", 5))
pair_plot = sns.PairGrid(data, palette=["red"], height=kwargs.pop("size", 5))
# UPPER
if upper_kde:
pair_plot.map_upper(sns.kdeplot, cmap="Blues_d")
Expand Down
42 changes: 28 additions & 14 deletions starvine/vine/C_vine.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def cubesets(K, d, bounds_list=()):
if (K & (K - 1)) or K < 2:
raise ValueError('K must be a positive power of 2. K: %s' % K)

return [set(tuple(p.tolist()) for p in c.reshape(-1, d)) for c in splitcubes(K, d, bounds_list)]
return list([set(tuple(p.tolist()) for p in c.reshape(-1, d)) for c in splitcubes(K, d, bounds_list)])


class Cvine(BaseVine):
Expand Down Expand Up @@ -212,9 +212,20 @@ def _pdf(self, x, **kwargs):
assert isinstance(x, DataFrame)
return np.exp(self._lnpdf(x, **kwargs))

def _cdf(self, x, **kwargs):
def _cdf(self, x, k_split=2, **kwargs):
"""
@brief CDF via numerical integration of PDF.
@param x pandas dataframe of locations at which to evaluate CDF.
@param k_split splits the [0,1]^D dimensional hypercube
over which the copula PDF is supported over into k_split chunks
in each dimension. Ex: a copula of D=3 with k_split=2
would have 8 equally sized cubical integration zones.
"""
# note: quadpy makes use of mypy
import quadpy
assert isinstance(x, DataFrame)
dim = len(x.columns)
scheme = quadpy.ncube.stroud_cn_5_9(dim)
cdf_vector = np.zeros(len(x))
points_vector = np.ones(len(x)) * 0.0
def _pdf(*x):
Expand All @@ -236,7 +247,7 @@ def f_pdf(x_np):
pdf_x[np.isnan(pdf_x)] = np.max(pdf_x)
return pdf_x

edge_tol = 1e-2
edge_tol = 1e-8
for i, x_i in x.iterrows():
ranges = np.zeros((len(x_i), 2)) + edge_tol
col_labels = []
Expand All @@ -246,17 +257,20 @@ def f_pdf(x_np):
ranges[j, 1] = np.clip(col_data, edge_tol, 1. - 0.1 * edge_tol)
j += 1

dim = len(x.columns)
sub_cubes = cubesets(4, dim, ranges)
for sub_cube in sub_cubes:
sub_cube_coords = np.asarray(sub_cube)
import pdb; pdb.set_trace()

scheme = quadpy.ncube.stroud_cn_5_9(dim)
cdf_vector[i] = scheme.integrate(f_pdf,
quadpy.ncube.ncube_points(
*[r for r in ranges])
)
sub_cubes = cubesets(k_split, dim, ranges)
for k, sub_cube in enumerate(sub_cubes):
sub_cube_coords = np.asarray(list(sub_cube))
sub_cube_ranges = np.array([np.min(sub_cube_coords, axis=0), np.max(sub_cube_coords, axis=0)]).T
# print("Integrating subcube %d out of %d" % (int(k), len(sub_cubes)))
cdf_vector[i] += scheme.integrate(f_pdf,
quadpy.ncube.ncube_points(
*[r for r in sub_cube_ranges])
)

# cdf_vector[i] = scheme.integrate(f_pdf,
# quadpy.ncube.ncube_points(
# *[r for r in ranges])
# )

#res = spi.nquad(_pdf, ranges,
# args=(col_labels),
Expand Down
11 changes: 9 additions & 2 deletions tests/test_C_vine_construct.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from scipy.stats import norm, beta
import unittest
import os
from scipy.optimize import minimize
import numpy as np
import pandas as pd
pwd_ = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -70,11 +71,15 @@ def testCvineConstruct(self):
print(tst_rho_matrix - sample_rho_matrix)
self.assertTrue(np.allclose(tst_rho_matrix - sample_rho_matrix, 0, atol=0.10))
self.assertTrue(np.allclose(tst_ktau_matrix - sample_ktau_matrix, 0, atol=0.10))


def opti_wrap(fun, x0, args, disp=0, **kwargs):
return minimize(fun, x0, args=args, method='Nelder-Mead',
tol=1e-12, options={'maxiter': 4000}).x

# fit marginal distributions to original data
marginal_dict = {}
for col_name in tstData.columns:
marginal_dict[col_name] = beta(*beta.fit(tstData[col_name]))
marginal_dict[col_name] = beta(*beta.fit(tstData[col_name], optimizer=opti_wrap))
# scale the samples
c_vine_scaled_samples_a = tstVine.scaleSamples(c_vine_samples, marginal_dict)
matrixPairPlot(c_vine_scaled_samples_a, savefig="vine_varaite_resampled_scaled_a.png")
Expand All @@ -97,11 +102,13 @@ def testCvineConstruct(self):
tstX['4d'] = [0.01, 0.4, 0.5, 0.6, 0.99, 0.999]
tstX['5e'] = [0.01, 0.4, 0.5, 0.6, 0.99, 0.999]
pdf_at_tstX = tstVine.vinePdf(tstX)
print("PDF at: ", tstX)
print(pdf_at_tstX)
self.assertTrue(np.all(pdf_at_tstX > 0.0))

# check vine cdf values
cdf_at_tstX = tstVine.vineCdf(tstX)
print("CDF at: ", tstX)
print(cdf_at_tstX)
cdf_tol = 0.05
self.assertTrue(cdf_at_tstX[1] > cdf_at_tstX[0])
Expand Down

0 comments on commit f3d00a0

Please sign in to comment.
0