我编写了一个简单的Runge四阶积分程序来求解一个常微分方程组,并使用OpenMP并行化。我不知道这是否是我们所能做的最好的,以最大限度地提高代码的性能,只需付出很少的努力。
我需要返回所有的值,所以我在所有步骤中都保留了值。我还在每个时间步骤中创建线程,并在位置上并行,也就是说,随着时间的推移,pragma opm parallel在循环中。
以下是我的尝试: (链接到gitlab存储库)
//RHS for a system of equations
void xprsys(const int n,const vector<double>& x, vector<double>& f)
{
/*
* n : number of equations
* x : value in each time step
* f : RHS of equtions dx/dt = f
*/
double sum1=0;
#pragma omp parallel for reduction(+:sum1)
for (int i=0; i<n; i++){
sum1 = 0;
for(int j=0; j<n; j++)
sum1 += sin(x[j]-x[i]);
f[i] = M_PI + 2.0 * sum1;
}
}
void SolveRK4(const int n, double h,vector<double> x,
vector<vector<double>>& x_vec,
int nstep, vector<double>& times)
{
/*
* times : vector contains the time 0 : t_final step dt
* x_vec : [nstep by N] 2 Dimensional vector
* dim1 : defined as typedef vector<double> dim1
*/
times[0] = 0.0;
dim1 y(n);
dim1 f1(n),f2(n),f3(n),f4(n);
double half_h = 0.5 * h;
double h_sixth = h/6.0;
// x_vec[nstepxN]
for (int i=0; i<n; i++)
x_vec[0][i] = x[i];
for (int k=1; k<nstep; k++){
times[k] = k*h;
xprsys(n,x,f1);
#pragma omp parallel for
for(int i=0; i<n; i++)
y[i] = x[i] + half_h * f1[i];
#pragma omp master
xprsys(n,y,f2);
#pragma omp barrier
#pragma omp for
for(int i=0; i<n; i++)
y[i] = x[i] + half_h * f2[i];
#pragma omp master
xprsys(n,y,f3);
#pragma omp barrier
#pragma omp for
for(int i=0; i<n; i++)
y[i] = x[i] + h * f3[i];
#pragma omp master
xprsys(n,y,f4);
#pragma omp barrier
#pragma omp for
for(int j=0; j<n; j++) {
x[j] = x[j] + h_sixth * (f1[j] + f4[j] + 2.0 * (f2[j] + f3[j]));
x_vec[k][j] = x[j];
}
}
}发布于 2017-07-23 13:27:36
为了遵循@miscco建议,我编写了这个版本,它使用valarray并具有良好的性能。我只用了一个omp loop --我在这里做的(链接到gitlab存储库)
void euler_integrator (StateVec &y, DerivFunc dydt, double dt) {
y += dydt (y) * dt;
}
/*------------------------------------------------------------*/
void runge_kutta4_integrator (StateVec &y, DerivFunc dydt, double dt) {
auto k1 = dydt (y);
auto k2 = dydt (y + dt*k1/2.);
auto k3 = dydt (y + dt*k2/2.);
auto k4 = dydt (y + dt*k3);
y += (k1 + 2.*k2 + 2.*k3 + k4) * dt/6;
}
/*------------------------------------------------------------*/
void integrate (Integrator integrator, DerivFunc dydt, OutputFunc output_func,
std::ofstream &output, StateVec y, int num_steps, double dt) {
for (auto step = 0; step < num_steps; ++step) {
output_func (step*dt, y, output);
integrator (y, dydt, dt);
}
}
/*------------------------------------------------------------*/
StateVec xprsys (const StateVec &x)
{
double sumx=0;
int N = PARN;
StateVec f;
f = StateVec(N);
#pragma omp parallel for reduction(+:sumx)
for (int i=0; i<N; i++){
sumx = 0;
for(int j=0; j<N; j++)
sumx += sin(x[j]-x[i]);
f[i] = PARw + PARka_over_N * sumx;
}
return f;
}https://codereview.stackexchange.com/questions/167460
复制相似问题