我正在尝试使用Symfit对分段模型执行交叉验证:
我的数据:
x_data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], dtype=float)
y_data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 10, 10])要使用交叉验证,我们需要创建一个估计器,因此我尝试将拟合打包到一个类中
from sklearn.model_selection import cross_validate, cross_val_score
class model:
def __init__( self, a=None, b=None, c=None ):
self.a = a
self.b = b
self.c = c
def _model_background(self, X, a, b, c):
y1 = a*x + b-a*c
y2 = b
return Model({y: Piecewise((y1, x <= c), (y2, x > c))})
def predict( self, X ):
return self._model_background( X, self.a, self.b, self.c )
def fit( self, X, y ):
from symfit import parameters, variables, Fit, Piecewise, Model
fit = Fit(self._model_background, x=x_data, y=y_data)
fit_result = fit.execute()
self.a = fit_result.value(a)
self.b = fit_result.value(b)
self.c = fit_result.value(c)
return self
def get_params( self, deep=False ):
return { 'a':self.a, 'b':self.b, 'c':self.c }
def set_params( self, **parameters ):
for parameter, value in parameters.intems():
setattr( self, parameter, value )
return self然后我执行交叉验证行
cross_validate( symfitmodel(), x_data, y_data, cv=5, scoring='neg_mean_squared_error' )但似乎我没有创建这个类,因为它应该是错误消息:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:548: FitFailedWarning: Estimator fit failed. The score on this train-test partition for these parameters will be set to nan. Details:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\support.py", line 282, in __get__
return getattr(obj, self.cache_attr)
AttributeError: 'Model' object has no attribute '_cached_connectivity_mapping'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "<ipython-input-3-db68eb82746d>", line 18, in fit
fit = Fit(self._model_background, x=x_data, y=y_data)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\support.py", line 423, in wrapped_func
return func(*bound_args.args, **bound_args.kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\fit.py", line 374, in __init__
self.model = Model(model)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py", line 875, in __init__
super(HessianModel, self).__init__(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py", line 824, in __init__
super(GradientModel, self).__init__(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py", line 125, in __init__
self._init_from_dict(model)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py", line 651, in _init_from_dict
super(BaseCallableModel, self)._init_from_dict(model_dict)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py", line 309, in _init_from_dict
ordered = list(toposort(self.connectivity_mapping))
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\support.py", line 285, in __get__
setattr(obj, self.cache_attr, self.fget(obj))
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\models.py", line 383, in connectivity_mapping
vars, params = seperate_symbols(expr)
File "C:\ProgramData\Anaconda3\lib\site-packages\symfit\core\support.py", line 82, in seperate_symbols
for symbol in func.free_symbols:
AttributeError: 'function' object has no attribute 'free_symbols'
warnings.warn("Estimator fit failed. The score on this train-test"我在curve_fit上试过了,但没有成功:
class piecewise:
def __init__( self, x0=None, a=None, b=None ):
self.x0 = x0
self.a = a
self.b = b
def _piecewise_background(self, X, x0, a, b):
return np.piecewise(X, [X < x0], [lambda X:a*X + b-a*x0, lambda X:b])
def predict( self, X ):
return self._piecewise_background( X, self.x0, self.a, self.b )
def fit( self, X, y ):
from scipy.optimize import curve_fit
popt, pcov = curve_fit( self._piecewise_background, X, y )
self.x0 = popt[0]
self.a = popt[1]
self.b = popt[2]
return self
def get_params( self, deep=False ):
return { 'x0':self.x0, 'a':self.a, 'b':self.b }
def set_params( self, **parameters ):
for parameter, value in parameters.intems():
setattr( self, parameter, value )
return self有什么想法吗?
发布于 2021-04-27 19:26:52
这不是一个答案,而是一个评论。
文中给出了一种直接方法(不迭代,没有猜测初值):https://fr.scribd.com/document/380941024/Regression-par-morceaux-Piecewise-Regression-pdf (本例中为第8页)。
当然,问题中给出的数据很少引起人们的兴趣,因为没有散布。所以不需要数值微积分,结果是显而易见的。但这可以用来检验分段回归的方法。

具有散乱数据的示例
为了使其更具代表性,上述数据已被分散。示例如下:

https://stackoverflow.com/questions/67273425
复制相似问题