您好,pyhf用户和开发人员!
我有一个来自previous question的问题,所以我将从其中一个响应中提供的anwser.py代码开始,然后进行一个小的修改。
因此,我使用响应中给出的参数运行fit,但我希望看到fit的结果,所以我添加了一些代码,根据fit结果对原始模板进行加权,并使用覆盖图重新绘制它。完整的脚本如下。
import pyhf
import pyhf.contrib.viz.brazil
import numpy as np
import matplotlib.pylab as plt
# - Get the uncertainties on the best fit signal strength
# - Calculate an 95% CL upper limit on the signal strength
tag = "ORIGINAL"
def plot_hist(ax, bins, data, bottom=0, color=None, label=None):
bin_width = bins[1] - bins[0]
bin_leftedges = bins[:-1]
bin_centers = [edge + bin_width / 2.0 for edge in bin_leftedges]
ax.bar(
bin_centers, data, bin_width, bottom=bottom, alpha=0.5, color=color, label=label
)
def plot_data(ax, bins, data, label="Data"):
bin_width = bins[1] - bins[0]
bin_leftedges = bins[:-1]
bin_centers = [edge + bin_width / 2.0 for edge in bin_leftedges]
ax.scatter(bin_centers, data, color="black", label=label)
def invert_interval(test_mus, hypo_tests, test_size=0.05):
# This will be taken care of in v0.5.3
cls_obs = np.array([test[0] for test in hypo_tests]).flatten()
cls_exp = [
np.array([test[1][idx] for test in hypo_tests]).flatten() for idx in range(5)
]
crossing_test_stats = {"exp": [], "obs": None}
for cls_exp_sigma in cls_exp:
crossing_test_stats["exp"].append(
np.interp(
test_size, list(reversed(cls_exp_sigma)), list(reversed(test_mus))
)
)
crossing_test_stats["obs"] = np.interp(
test_size, list(reversed(cls_obs)), list(reversed(test_mus))
)
return crossing_test_stats
def main():
np.random.seed(0)
pyhf.set_backend("numpy", "minuit")
observable_range = [0.0, 10.0]
bin_width = 0.5
_bins = np.arange(observable_range[0], observable_range[1] + bin_width, bin_width)
n_bkg = 2000
n_signal = int(np.sqrt(n_bkg))
# Generate simulation
bkg_simulation = 10 * np.random.random(n_bkg)
signal_simulation = np.random.normal(5, 1.0, n_signal)
bkg_sample, _ = np.histogram(bkg_simulation, bins=_bins)
signal_sample, _ = np.histogram(signal_simulation, bins=_bins)
# Generate observations
signal_events = np.random.normal(5, 1.0, int(n_signal * 0.8))
bkg_events = 10 * np.random.random(int(n_bkg + np.sqrt(n_bkg)))
observed_events = np.array(signal_events.tolist() + bkg_events.tolist())
observed_sample, _ = np.histogram(observed_events, bins=_bins)
# Visualize the simulation and observations
fig, ax = plt.subplots()
fig.set_size_inches(7, 5)
plot_hist(ax, _bins, bkg_sample, label="Background")
plot_hist(ax, _bins, signal_sample, bottom=bkg_sample, label="Signal")
plot_data(ax, _bins, observed_sample)
ax.legend(loc="best")
ax.set_ylim(top=np.max(observed_sample) * 1.4)
ax.set_xlabel("Observable")
ax.set_ylabel("Count")
fig.savefig("components_{0}.png".format(tag))
# Build the model
bkg_uncerts = np.sqrt(bkg_sample)
model = pyhf.simplemodels.hepdata_like(
signal_data=signal_sample.tolist(),
bkg_data=bkg_sample.tolist(),
bkg_uncerts=bkg_uncerts.tolist(),
)
data = pyhf.tensorlib.astensor(observed_sample.tolist() + model.config.auxdata)
# Perform inference
fit_result = pyhf.infer.mle.fit(data, model, return_uncertainties=True)
bestfit_pars, par_uncerts = fit_result.T
print(
f"best fit parameters:\
\n * signal strength: {bestfit_pars[0]} +/- {par_uncerts[0]}\
\n * nuisance parameters: {bestfit_pars[1:]}\
\n * nuisance parameter uncertainties: {par_uncerts[1:]}"
)
# Visualize the results
fit_bkg_sample = []
for w,b in zip(bestfit_pars[1:],bkg_sample):
fit_bkg_sample.append(w*b)
fit_signal_sample = bestfit_pars[0]*np.array(signal_sample)
fig, ax = plt.subplots()
fig.set_size_inches(7, 5)
plot_hist(ax, _bins, fit_bkg_sample, label="Background")
plot_hist(ax, _bins, fit_signal_sample, bottom=fit_bkg_sample, label="Signal")
plot_data(ax, _bins, observed_sample)
ax.legend(loc="best")
ax.set_ylim(top=np.max(observed_sample) * 1.4)
ax.set_xlabel("Observable")
ax.set_ylabel("Count")
fig.savefig("components_after_fit_{0}.png".format(tag))
# Perform hypothesis test scan
_start = 0.0
_stop = 5
_step = 0.1
poi_tests = np.arange(_start, _stop + _step, _step)
print("\nPerforming hypothesis tests\n")
hypo_tests = [
pyhf.infer.hypotest(
mu_test,
data,
model,
return_expected_set=True,
return_test_statistics=True,
qtilde=True,
)
for mu_test in poi_tests
]
# Upper limits on signal strength
results = invert_interval(poi_tests, hypo_tests)
print(f"Observed Limit on µ: {results['obs']:.2f}")
print("-----")
for idx, n_sigma in enumerate(np.arange(-2, 3)):
print(
"Expected {}Limit on µ: {:.3f}".format(
" " if n_sigma == 0 else "({} σ) ".format(n_sigma),
results["exp"][idx],
)
)
# Visualize the "Brazil band"
fig, ax = plt.subplots()
fig.set_size_inches(7, 5)
ax.set_title("Hypothesis Tests")
ax.set_ylabel(r"$\mathrm{CL}_{s}$")
ax.set_xlabel(r"$\mu$")
pyhf.contrib.viz.brazil.plot_results(ax, poi_tests, hypo_tests)
fig.savefig("brazil_band_{0}.png".format(tag))
if __name__ == "__main__":
main()当我运行它时,我得到了以下图。第一个是原始观测/模拟,第二个图根据拟合结果对模拟进行缩放。


所以这一切看起来都很好,我想我知道发生了什么。
但现在我要问一个问题,在您的示例中稍有不同就可以更好地说明这个问题。
我将修改生成的模拟和观察结果,以便在模拟和样本中有不同数量的背景事件。我还让信号变得更有意义。这将是一个例子,在进行拟合之前,我无法很好地估计背景贡献。在您提供的示例中,模拟样本和数据的背景事件数量是相同的,但在现实世界中不是这样。
因此,我转到上面的代码,并更改了这些行。
n_bkg = 2000
n_signal = 200
# Generate simulation
bkg_simulation = 10 * np.random.random(n_bkg)
signal_simulation = np.random.normal(5, 1.0, n_signal)
bkg_sample, _ = np.histogram(bkg_simulation, bins=_bins)
signal_sample, _ = np.histogram(signal_simulation, bins=_bins)
# Generate observations
signal_events = np.random.normal(5, 1.0, int(n_signal * 0.8))
bkg_events = 10 * np.random.random(n_bkg - 300)拟合不是很好,我也不希望它会很好,因为我锁定了背景事件的数量,模化了每个bin中的泊松波动。此处显示了相关的曲线图(拟合前/拟合后)。


我可能认为另一种方法是添加另一个无干扰的浮动参数来表示背景强度,同时仍然允许单个存储箱在泊松波动范围内变化。就此而言,不能(不应该吗?)信号箱也会波动吗?
在这种情况下,我会从我的模拟样本中大量的数据点开始,以获得更“真实”(我知道这不是严格的)分布。一旦拟合将信号/背景事件的数量降低,泊松波动将变得更加显著。
我敢肯定,似然函数的优化/最小化会变得更加困难,但如果我们锁定大量的背景归一化,也会感觉到我们过早地限制了拟合。或许我漏掉了什么?
一如既往的感谢您的帮助和回复!
发布于 2020-10-30 20:28:25
您应该能够通过向背景组件添加"normfactor“修饰符来添加新的麻烦
例如:
spec = {'channels': [{'name': 'singlechannel',
'samples': [{'name': 'signal',
'data': [10.0],
'modifiers': [{'name': 'mu', 'type': 'normfactor', 'data': None}]},
{'name': 'background',
'data': [50.0],
'modifiers': [{'name': 'bkgnorm',
'type': 'normfactor',
'data': None}]}]}]}查看信号和背景之间的对称性
https://stackoverflow.com/questions/64538170
复制相似问题