我正在尝试优化编写一个脚本,它有两个最小化,一个依赖于另一个。我的代码有点臃肿,我发现我正在求解的全局参数很大程度上依赖于对本地参数的初始猜测(这是非常令人关注的)。因此,当代码工作时,我希望对其进行优化,包括速度和健壮性(也就是说,由于本地初始猜测,全局解决方案不应该有太大变化)。
假设你有多个线性方程组:
#system of equations 1
equation1=dF*pF+dO*pO+dC*pC
equation2=dF*pF2+dO*pO2+dC*pC2
equation3=dF*pF3+dO*pO3+dC*pC3
equation4=dF*pF4+dO*pO4+dC*pC4
#system 2
equation5=dF2*pF+dO2*pO+dC2*pC
equation6=dF2*pF2+dO2*pO2+dC2*pC2
equation7=dF2*pF3+dO2*pO3+dC2*pC3
equation8=dF2*pF4+dO2*pO4+dC2*pC4
...对于每个方程组,有4个方程,具有15个可调参数(其中3个是共享的dF、dO和dC)。然而,其他12是在多个不同系统之间共享的。给出了"equation1,equation2,equation3.“的实验值上面的方程组被最小化了。
这12个参数可由4个全局参数(k,k1,k2,k3) io是一个给定的常数.确定。
pF=(sqrt(k)*sqrt(k1)*sqrt(kk1+8io(1+k1)-kk1)/(4(1+k1))
pF2=(sqrt(k*k2)*sqrt(k1*k2)*sqrt(kk2k1k2+8io(1+k1*k2)-kk2k1k2)/(4(1+k1k2))
pF3=(sqrt(k*k3)*sqrt(k1*k3)*sqrt(kk3k1k3+8io(1+k1k3)-kk3k1k3)/(4(1+k1*k3))
pF4=(sqrt(k*k2*k3)*sqrt(k1*k2*k3)*sqrt(kk2k3k1k2k3+8io(1+k1k2k3)-kk2k3k1k2k3)/(4(1+k1*k2*k3))
#all other ps can be calculated in the same manner, but they have different equations I won't list here for space reasons因此,我们可以用4个全局参数(k,k1,k2,k3)来计算p's,然后用给定的p‘s分别遍历每个方程组,求解dF,dO,dC。然后,使用“解决/最小化”本地参数的组合chi2 (dF、dO、dC)将全局参数最小化。
以下是代码:
from scipy.optimize import minimize
import numpy as np
experimental_data_list=[[117.77, 117.705, 117.843, 117.597], [110.575, 110.258, 110.167, 110.216], [125.691, 125.006, 125.327, 124.481], [107.491, 108.461, 107.804, 109.383], [128.689, 128.383, 128.668, 128.29], [125.969, 126.326, 126.28, 126.257], [122.439, 122.684, 122.859, 122.194], [125.989, 125.998, 125.985, 125.897], [120.916, 120.18, 120.345, 120.567], [126.772, 126.669, 127.006, 127.592], [120.176, 120.153, 119.864, 120.205]]
def local_calculation(d,pF,pO,pC,pF2,pO2,pC2,pF3,pO3,pC3,pF4,pO4,pC4,experimental_data):
equation1=(d[0]*pF)+(d[1]*pO)+(d[2]*pC)
equation2=(d[0]*pF2)+(d[1]*pO2)+(d[2]*pC2)
equation3=(d[0]*pF3)+(d[1]*pO3)+(d[2]*pC3)
equation4=(d[0]*pF4)+(d[1]*pO4)+(d[2]*pC4)
return np.sqrt(np.sum((experimental_data-np.array([equation1,equation2,equation3,equation4]))**2))
def get_populations(k,io):
pF=(((np.sqrt(k[0])*np.sqrt(k[1]))*(np.sqrt((8*io*(k[1]+1))+(k[0]*k[1])))-(k[0]*k[1]))/(4*(k[1]+1)))/io
pO=((k[1]*((-np.sqrt(k[0])*np.sqrt(k[1]))*(np.sqrt((8*io*(k[1]+1))+(k[0]*k[1])))+(k[0]*k[1])+(4*io*(k[1]+1))))/(4*((k[1]+1)**2)))/io
pC=(((-np.sqrt(k[0])*np.sqrt(k[1]))*(np.sqrt((8*io*(k[1]+1))+(k[0]*k[1])))+(k[0]*k[1])+(4*io*(k[1]+1)))/(4*((k[1]+1)**2)))/io
pF2=(((np.sqrt((k[0]*k[2]))*np.sqrt((k[1]*k[2])))*(np.sqrt((8*io*((k[1]*k[2])+1))+((k[0]*k[2])*(k[1]*k[2]))))-((k[0]*k[2])*(k[1]*k[2])))/(4*((k[1]*k[2])+1)))/io
pO2=(((k[1]*k[2])*((-np.sqrt((k[0]*k[2]))*np.sqrt((k[1]*k[2])))*(np.sqrt((8*io*((k[1]*k[2])+1))+((k[0]*k[2])*(k[1]*k[2]))))+((k[0]*k[2])*(k[1]*k[2]))+(4*io*((k[1]*k[2])+1))))/(4*(((k[1]*k[2])+1)**2)))/io
pC2=(((-np.sqrt((k[0]*k[2]))*np.sqrt((k[1]*k[2])))*(np.sqrt((8*io*((k[1]*k[2])+1))+((k[0]*k[2])*(k[1]*k[2]))))+((k[0]*k[2])*(k[1]*k[2]))+(4*io*((k[1]*k[2])+1)))/(4*(((k[1]*k[2])+1)**2)))/io
pF3=(((np.sqrt((k[0]*k[3]))*np.sqrt((k[1]*k[3])))*(np.sqrt((8*io*((k[1]*k[3])+1))+((k[0]*k[3])*(k[1]*k[3]))))-((k[0]*k[3])*(k[1]*k[3])))/(4*((k[1]*k[3])+1)))/io
pO3=(((k[1]*k[3])*((-np.sqrt((k[0]*k[3]))*np.sqrt((k[1]*k[3])))*(np.sqrt((8*io*((k[1]*k[3])+1))+((k[0]*k[3])*(k[1]*k[3]))))+((k[0]*k[3])*(k[1]*k[3]))+(4*io*((k[1]*k[3])+1))))/(4*(((k[1]*k[3])+1)**2)))/io
pC3=(((-np.sqrt((k[0]*k[3]))*np.sqrt((k[1]*k[3])))*(np.sqrt((8*io*((k[1]*k[3])+1))+((k[0]*k[3])*(k[1]*k[3]))))+((k[0]*k[3])*(k[1]*k[3]))+(4*io*((k[1]*k[3])+1)))/(4*(((k[1]*k[3])+1)**2)))/io
pF4=(((np.sqrt((k[0]*k[2]*k[3]))*np.sqrt((k[1]*k[2]*k[3])))*(np.sqrt((8*io*((k[1]*k[2]*k[3])+1))+((k[0]*k[2]*k[3])*(k[1]*k[2]*k[3]))))-((k[0]*k[2]*k[3])*(k[1]*k[2]*k[3])))/(4*((k[1]*k[2]*k[3])+1)))/io
pO4=(((k[1]*k[2]*k[3])*((-np.sqrt((k[0]*k[2]*k[3]))*np.sqrt((k[1]*k[2]*k[3])))*(np.sqrt((8*io*((k[1]*k[2]*k[3])+1))+((k[0]*k[2]*k[3])*(k[1]*k[2]*k[3]))))+((k[0]*k[2]*k[3])*(k[1]*k[2]*k[3]))+(4*io*((k[1]*k[2]*k[3])+1))))/(4*(((k[1]*k[2]*k[3])+1)**2)))/io
pC4=(((-np.sqrt((k[0]*k[2]*k[3]))*np.sqrt((k[1]*k[2]*k[3])))*(np.sqrt((8*io*((k[1]*k[2]*k[3])+1))+((k[0]*k[2]*k[3])*(k[1]*k[2]*k[3]))))+((k[0]*k[2]*k[3])*(k[1]*k[2]*k[3]))+(4*io*((k[1]*k[2]*k[3])+1)))/(4*(((k[1]*k[2]*k[3])+1)**2)))/io
local_chi2=[]
for experimental_data in experimental_data_list:
arguments=(pF,pO,pC,pF2,pO2,pC2,pF3,pO3,pC3,pF4,pO4,pC4,experimental_data)
local_solution=minimize(local_calculation,args=arguments, x0=np.array([120,110,100]),method = 'Nelder-Mead')
local_chi2.append(local_solution.fun)
return sum(local_chi2)
io=280000
global_parameter_solution=minimize(get_populations,args=io, x0=np.array([1000,0.002,8,20]),method = 'Nelder-Mead')发布于 2023-03-31 02:00:38
作为一个开始:
不要将experimental_data_list全部写成一行;给每一行自己的行,并将其放入np.array中。
将pF0,1,2,3组合成3x4矩阵的一行,也包括O和C。
移除你多余的父母。有这么多这么多。get_populations中的内部表达式是以一种非常糟糕的方式格式化的,在此之前,我不会建议进一步简化--举个小例子,
((np.sqrt(k[0])*np.sqrt(k[1]))
真的是
np.sqrt(k0*k1)
使用k0,k1从k中解压。
不要使local_chi2成为一个列表;使它成为一个从0开始的浮动,并保持一个正在运行的总计。
不要调用内部minimize。认识到您的local_calculation实际上是d @ pFOC的矩阵乘法。向numpy询问最小二乘矩阵反演,并使用它为您的气返回的残差。
请仔细检查一下。添加数字回归测试,以确保get_populations仍然以相同的方式工作,并在调试器中执行几次。
from scipy.optimize import minimize
import numpy as np
experimental_data_list = np.array((
[117.770, 117.705, 117.843, 117.597],
[110.575, 110.258, 110.167, 110.216],
[125.691, 125.006, 125.327, 124.481],
[107.491, 108.461, 107.804, 109.383],
[128.689, 128.383, 128.668, 128.290],
[125.969, 126.326, 126.280, 126.257],
[122.439, 122.684, 122.859, 122.194],
[125.989, 125.998, 125.985, 125.897],
[120.916, 120.180, 120.345, 120.567],
[126.772, 126.669, 127.006, 127.592],
[120.176, 120.153, 119.864, 120.205],
))
def local_calculation(
d: np.ndarray,
pFOC: np.ndarray,
experimental_data: np.ndarray,
):
equations = d @ pFOC
return np.linalg.norm(experimental_data - equations)
def get_populations(k: np.ndarray, io: float) -> float:
k0, k1, k2, k3 = k
pF = (
(((np.sqrt(k0)*np.sqrt(k1))*(np.sqrt((8*io*(k1+1))+(k0*k1)))-(k0*k1))/(4*(k1+1)))/io,
(((np.sqrt((k0*k2))*np.sqrt((k1*k2)))*(np.sqrt((8*io*((k1*k2)+1))+((k0*k2)*(k1*k2))))-((k0*k2)*(k1*k2)))/(4*((k1*k2)+1)))/io,
(((np.sqrt((k0*k3))*np.sqrt((k1*k3)))*(np.sqrt((8*io*((k1*k3)+1))+((k0*k3)*(k1*k3))))-((k0*k3)*(k1*k3)))/(4*((k1*k3)+1)))/io,
(((np.sqrt((k0*k2*k3))*np.sqrt((k1*k2*k3)))*(np.sqrt((8*io*((k1*k2*k3)+1))+((k0*k2*k3)*(k1*k2*k3))))-((k0*k2*k3)*(k1*k2*k3)))/(4*((k1*k2*k3)+1)))/io,
)
pO = (
((k1*((-np.sqrt(k0)*np.sqrt(k1))*(np.sqrt((8*io*(k1+1))+(k0*k1)))+(k0*k1)+(4*io*(k1+1))))/(4*((k1+1)**2)))/io,
(((k1*k2)*((-np.sqrt((k0*k2))*np.sqrt((k1*k2)))*(np.sqrt((8*io*((k1*k2)+1))+((k0*k2)*(k1*k2))))+((k0*k2)*(k1*k2))+(4*io*((k1*k2)+1))))/(4*(((k1*k2)+1)**2)))/io,
(((k1*k3)*((-np.sqrt((k0*k3))*np.sqrt((k1*k3)))*(np.sqrt((8*io*((k1*k3)+1))+((k0*k3)*(k1*k3))))+((k0*k3)*(k1*k3))+(4*io*((k1*k3)+1))))/(4*(((k1*k3)+1)**2)))/io,
(((k1*k2*k3)*((-np.sqrt((k0*k2*k3))*np.sqrt((k1*k2*k3)))*(np.sqrt((8*io*((k1*k2*k3)+1))+((k0*k2*k3)*(k1*k2*k3))))+((k0*k2*k3)*(k1*k2*k3))+(4*io*((k1*k2*k3)+1))))/(4*(((k1*k2*k3)+1)**2)))/io,
)
pC = (
(((-np.sqrt(k0)*np.sqrt(k1))*(np.sqrt((8*io*(k1+1))+(k0*k1)))+(k0*k1)+(4*io*(k1+1)))/(4*((k1+1)**2)))/io,
(((-np.sqrt((k0*k2))*np.sqrt((k1*k2)))*(np.sqrt((8*io*((k1*k2)+1))+((k0*k2)*(k1*k2))))+((k0*k2)*(k1*k2))+(4*io*((k1*k2)+1)))/(4*(((k1*k2)+1)**2)))/io,
(((-np.sqrt((k0*k3))*np.sqrt((k1*k3)))*(np.sqrt((8*io*((k1*k3)+1))+((k0*k3)*(k1*k3))))+((k0*k3)*(k1*k3))+(4*io*((k1*k3)+1)))/(4*(((k1*k3)+1)**2)))/io,
(((-np.sqrt((k0*k2*k3))*np.sqrt((k1*k2*k3)))*(np.sqrt((8*io*((k1*k2*k3)+1))+((k0*k2*k3)*(k1*k2*k3))))+((k0*k2*k3)*(k1*k2*k3))+(4*io*((k1*k2*k3)+1)))/(4*(((k1*k2*k3)+1)**2)))/io,
)
pFOC = np.array((pF, pO, pC))
local_chi2 = 0
for experimental_data in experimental_data_list:
# d @ pFOC ~ experimental_data; what is d?
d, (residuals,), rank, singular = np.linalg.lstsq(pFOC.T, experimental_data)
local_chi2 += np.sqrt(residuals)
return local_chi2
io = 280_000
global_parameter_solution = minimize(
get_populations,
args=io,
x0=(1000, 0.002, 8, 20),
method='Nelder-Mead',
)还有其他改进的可能,但让我们先从根本上解决问题。
从pFOC计算中提取通用表达式。进一步向向量表示单个矩阵的逆,而不是逐行进行。这会产生
from typing import Iterable
from scipy.optimize import minimize
import numpy as np
experimental_data_list = np.array((
[117.770, 117.705, 117.843, 117.597],
[110.575, 110.258, 110.167, 110.216],
[125.691, 125.006, 125.327, 124.481],
[107.491, 108.461, 107.804, 109.383],
[128.689, 128.383, 128.668, 128.290],
[125.969, 126.326, 126.280, 126.257],
[122.439, 122.684, 122.859, 122.194],
[125.989, 125.998, 125.985, 125.897],
[120.916, 120.180, 120.345, 120.567],
[126.772, 126.669, 127.006, 127.592],
[120.176, 120.153, 119.864, 120.205],
))
def get_pfoc(k: Iterable[float]) -> np.ndarray:
k0, k1, k2, k3 = k
ki = np.array((1, k2, k3, k2*k3))
k01ii = k0*k1*ki*ki
a = 2*(k1*ki + 1)
s = np.sqrt(
k01ii*(4*io*a + k01ii)
) - k01ii
f = s/io/a/2
c = (2*io*a - s)/io/a/a
o = k1*ki*c
return np.vstack((f, o, c))
def get_populations(k: np.ndarray) -> float:
pFOC = get_pfoc(k)
# pFOC.T @ d = edl.T
# 4x3 @ 3x11 = 4x11
d, *_ = np.linalg.lstsq(pFOC.T, experimental_data_list.T, rcond=None)
error = (d.T@pFOC - experimental_data_list).ravel()
return error.dot(error)
io = 280_000
# Regression test for pFOC calculation
assert np.allclose(
get_pfoc((1000, 0.002, 8, 20)),
np.array((
[0.00188615, 0.014887 , 0.03638202, 0.23081748],
[0.00199224, 0.01551359, 0.03706223, 0.18646849],
[0.9961216 , 0.96959941, 0.92655575, 0.58271403],
)),
)
global_parameter_solution = minimize(
fun=get_populations,
x0=(1000, 0.002, 8, 20),
bounds=((0, None),)*4,
)
print(global_parameter_solution) message: CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL
success: True
status: 0
fun: 0.23517135040546988
x: [ 9.999e+02 8.614e-01 4.891e+00 2.441e+00]
nit: 22
jac: [ 6.384e-07 1.033e-06 2.839e-06 -9.689e-06]
nfev: 200
njev: 40
hess_inv: <4x4 LbfgsInvHessProduct with dtype=float64>https://codereview.stackexchange.com/questions/284239
复制相似问题