首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >如何计算核密度函数的概率(例如5%,10%,90%)?

如何计算核密度函数的概率(例如5%,10%,90%)?
EN

Stack Overflow用户
提问于 2022-07-11 04:54:53
回答 1查看 173关注 0票数 2

我试图绘制内核密度分布(Gaussian)曲线以及python中两个数据集的直方图图。

但是,在我的脚本中,95% (data1:用红色垂直线标记)和5% (data2:用黑色垂直线标记)的估计非常耗时,例如,我需要测试代码中不同的限制细节解释,其中我需要更改上限,以获得内核密度曲线的95%和5%的概率。

希望有人在这里帮助我,并提出解决这个问题的可能方法,或者用另一种方法来绘制内核密度曲线,以及95%和5%的概率。

谢谢!

我的剧本在这里。

代码语言:javascript
复制
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.neighbors import KernelDensity
from scipy import stats
data1 = result['95_24'] # data 1
data2 = result['5_24'] # data 2 
def plot_prob_density(data1, data2, x_start1, x_end1):
    fig, (ax1) = plt.subplots(1, 1, figsize=(6,5), sharey=False)

    unit = 1.5
    x = np.linspace(-20, 20, 1000)[:, np.newaxis]
    
    # Hisogram plot of data 
    ax1.hist(data1, bins=np.linspace(-20,20,40), density=True, color='r', alpha=0.4)
    ax1.hist(data2, bins=np.linspace(-20,20,40), density=True, color='k', alpha=0.4)


    # kernel density estimation
    kd_data1 = KernelDensity(kernel='gaussian', bandwidth=1.8).fit(data1)
    kd_data2 = KernelDensity(kernel='gaussian', bandwidth=1.8).fit(data2)

    kd_vals_data1 = np.exp(kd_data1.score_samples(x))
    kd_vals_data2 = np.exp(kd_data2.score_samples(x))
    
    # density plot 
    ax1.plot(x, kd_vals_data1, color='r', label='$Na$', linewidth=2)
    ax1.plot(x, kd_vals_data2, color='k', label='$Λ$', linewidth = 2) 
  
    #  using the function get probability)
    ax1.axvline(x=x_end1,color='red',linestyle='dashed', linewidth = 3, label='$β_{95\%}$')
    ax1.axvline(x=x_start1,color='k',linestyle='dashed', linewidth = 3, label='$β_{5\%}$')
    

    # Show the plots
    ax1.set_ylabel('Probability density', fontsize=12)
    ax1.set_xlabel('Beta', fontsize=12)
    ax1.set_xlim([-20, 20])
    ax1.set_ylim(0, 0.3)
    ax1.set_yticks([0, 0.1, 0.2, 0.3]) 
    ax1.set_xticks([-20, 20, -10, 10, 0])
    
    ax1.legend(fontsize=12, loc='upper left', frameon=False)
    
    fig.tight_layout()
    gc.collect()
    return kd_data1, kd_data2,

# Calculation of 95% and 5 % for data1 and data2 Kernel density curve


def get_probability(start_value, end_value, eval_points, kd):
    
    # Number of evaluation points 
    N = eval_points                                      
    step = (end_value - start_value) / (N - 1)  # Step size

    x = np.linspace(start_value, end_value, N)[:, np.newaxis]  # Generate values in the range
    kd_vals = np.exp(kd.score_samples(x))  # Get PDF values for each x
    probability = np.sum(kd_vals * step)  # Approximate the integral of the PDF
    return probability.round(4)

data1 = np.array(data1).reshape(-1, 1)
data2 = np.array(data2).reshape(-1, 1)


kd_data1, kd_data2= plot_prob_density(data1, data2, x_start1=-2.2, x_end1=5.3)



# ##############################
print('Beta-95%: {}'
      .format(get_probability(start_value = -20, 
                              end_value = 5.3, 
                              eval_points = 1000, 
                              kd = kd_data1)))
# here, I modify the end-value every time and then see teh output  #value, when it reached to 95% then i took taht values as 95% #confidence, however this is very confsuing, i want to compute this 95% directly and same for 5% probbaility, computed below:
print('Beta-5%: {}\n'
      .format(get_probability(start_value = -20, 
                              end_value = -2.2, 
                              eval_points = 1000, 
                              kd = kd_data2)))
####################################################################

plt.savefig("Ev_test.png")

图示也附在这里。直方图和内核密度图及其95%和5%的概率限制,用红色和黑色垂直粗体线突出显示:

EN

回答 1

Stack Overflow用户

发布于 2022-07-16 12:03:12

以下是解决这一问题的可能办法。此外,所提议的方法在百分位数计算方面存在错误,因此我建议不要使用该方法:

代码语言:javascript
复制
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
import seaborn as sns
from sklearn.neighbors import KernelDensity
%matplotlib inline
import numpy as np
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.distributions.mixture_rvs import mixture_rvs
from scipy.stats import norm
import numpy as np



fig = plt.figure(figsize=(4, 4), dpi=300)
ax = fig.add_subplot(111)

# Plot the histogram
ax.hist(data8,bins=20,zorder=1,color="r",density=True,alpha=0.6,)
ax.hist(data7,bins=20,zorder=1,color="black",density=True,alpha=0.6,)

# kde.fit()
kde = sm.nonparametric.KDEUnivariate(data8)
kde1 = sm.nonparametric.KDEUnivariate(data7)

# Plot the KDE for various bandwidths
for bandwidth in [1.8]:
    kde.fit(bw=bandwidth)
    kde1.fit(bw=bandwidth)# Estimate the densities
    ax.plot(kde.support, kde.density,"-",lw=2,color="r",zorder=10, alpha=0.6, label="Data1")
    ax.plot(kde1.support, kde1.density,"-",lw=2,color="black",zorder=10, alpha=0.6, label="Data2")


ax.legend(loc="best")
ax.set_xlim([-20, 40])
ax.set_ylim([0, 0.3])
ax.grid(False)

# Probabilities calculation 

quantiles_mesh = np.linspace(0,1,len(kde.density))
fig = plt.figure(figsize=(2, 2), dpi=300)
plt.plot(quantiles_mesh, kde.icdf)

data_1_95=  np.percentile(kde1.icdf, 95)
data_2_5=  np.percentile(kde2.icdf, 5)
ax.axvline(x=data_1_95,color='red',linestyle='dashed', linewidth = 2)
ax.axvline(x=data_2_5,color='k',linestyle='dashed', linewidth = 2)
#plt.savefig("KDE_Plot.png")
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/72933871

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档