我试图通过将一个CUDA内核分解成四个不同的内核来优化它。在代码开始时,我已经对所有这些内核进行了原型化。
__global__ void knowles_flux__oligomers(double*, double*);
__global__ void knowles_flux__nucleus(double*, double*);
__global__ void knowles_flux__fibrils(double*, double*);
__global__ void knowles_flux__maxlength(double*, double*);
__device__ void calcFlux(double*, double*, double*);
... Code ...
__device__ void calcFlux(double* concs, double* fluxes, double* dt)
{
knowles_flux_fibrils<<< numBlocks, numThreads >>>(fluxes, concs);
cudaDeviceSynchronize();
knowles_flux_oligomers<<< 1, nc-1 >>>(fluxes, concs);
knowles_flux_nucleus<<< 1, 1 >>>(fluxes, concs);
knowles_flux_maxlength<<< 1, 1 >>>(fluxes, concs);
cudaDeviceSynchronize();
}
__global__ void knowles_flux_oligomers(double *fluxes, double *conc)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x + 1;
fluxes[idx] = 0;
}
__global__ void knowles_flux_nucleus(double *fluxes, double *conc)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x + nc - 1;
double frag_term = 0;
for (int s = idx+1; s < (maxlength); s++)
{
frag_term += conc[s];
}
fluxes[idx] = (kn)*pow(conc[0],(nc)) + 2*(km)*frag_term - 2*(ka)*conc[idx]*conc[0];
}
__global__ void knowles_flux_fibrils(double *fluxes, double *conc)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
double frag_term = 0;
for (int s = idx+1; s < (maxlength); s++)
{
frag_term += conc[s];
}
fluxes[idx] = -(km)*(idx)*conc[idx] + 2*(km)*frag_term - 2*(ka)*conc[idx]*conc[0] + 2*(ka)*conc[idx-1]*conc[0];
}
__global__ void knowles_flux_maxlength(double *fluxes, double *conc)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x + maxlength - 1;
fluxes[idx] = -km*(idx)*conc[idx]+2*(ka)*conc[idx-1]*conc[0];
}导致错误“加脂二进制:致命错误:'Multiple‘..sm_35.cubin’不是在'keyword=value‘格式中,其中'Multiple’部分是我试图编译的源文件的尾部。
也许我的google-fu很弱,但我不会因为这种错误而想出任何东西。
发布于 2014-02-16 07:06:06
好的,这个问题与代码无关。这个错误实际上与我试图编译的源文件的文件名有关。文件名最初是"GPU RKF45 (可变步长,多通量内核).cu“。文件名中的逗号似乎造成了问题。摆脱它让它编译。
https://stackoverflow.com/questions/21808285
复制相似问题