我的应用程序使用OpenCL。我在我的机器上进行了测试,它运行得很好,但在用户的机器上,内核就不能工作了。
我的机器
OpenCL Status : Using OpenCL Platform : Intel(R) OpenCL HD Graphics
OpenCL Status : Using GPU Device : Intel(R) HD Graphics 510用户机器
OpenCL Status : Using OpenCL Platform : NVIDIA CUDA
OpenCL Status : Using GPU Device : GeForce GT 730码C++
kernels = new ComputeKernel();
std::string source = ReadShaderSourceFile(GetExecutableDir() + "\\Data\\kernels\\generators\\generators.cl", &tmp);
kernels->AddSoruce(source);
kernels->BuildProgram("-I" + appState->globals.kernelsIncludeDir + " -cl-fast-relaxed-math -cl-mad-enable");
kernels->AddKernel("clear_mesh_terrain");
kernels->CreateBuffer("mesh", CL_MEM_READ_WRITE, appState->models.customBase->mesh->vertexCount * sizeof(Vert));
kernels->WriteBuffer("mesh", true, appState->models.customBase->mesh->vertexCount * sizeof(Vert), appState->models.customBase->mesh->vert);
kernels->SetKernelArg("clear_mesh_terrain", 0, "mesh");
kernels->ExecuteKernel("clear_mesh_terrain", cl::NDRange(1),
cl::NDRange(appState->models.coreTerrain->mesh->vertexCount));ComputeKernel类:
void ComputeKernel::AddSoruce(std::string source)
{
sources.push_back({source.c_str(), source.size()});
}
void ComputeKernel::BuildProgram(std::string options)
{
program = cl::Program(context, sources);
if (program.build({ device }, options.c_str()) != CL_SUCCESS)
{
onStatus("Error Building : " + program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device));
return;
}
}
void ComputeKernel::AddKernel(std::string name)
{
kernels[name] = cl::Kernel(program, name.c_str());
}
void ComputeKernel::Clear()
{
sources.clear();
kernels.clear();
}
void ComputeKernel::ExecuteKernel(std::string name, cl::NDRange local, cl::NDRange global)
{
queue.enqueueNDRangeKernel(kernels[name], cl::NullRange, global, local);
queue.finish();
}
void ComputeKernel::CreateBuffer(std::string name, int type, size_t size)
{
OpenCLBuffer buffer;
buffer.size = size;
buffer.buffer = cl::Buffer(context, type, size);
buffers[name] = buffer;
}
void ComputeKernel::SetKernelArg(std::string name, int arg, std::string buffer)
{
kernels[name].setArg(arg, buffers[buffer].buffer);
}
void ComputeKernel::ReadBuffer(std::string buffer, bool blocking, size_t size, void* data)
{
queue.enqueueReadBuffer(buffers[buffer].buffer, blocking ? CL_TRUE : CL_FALSE, 0, size, data);
}
void ComputeKernel::WriteBuffer(std::string buffer, bool blocking, size_t size, void* data)
{
queue.enqueueWriteBuffer(buffers[buffer].buffer, blocking ? CL_TRUE : CL_FALSE, 0, size, data);
}内核:
__kernel void clear_mesh_terrain(__global Vert* mesh)
{
int i = get_global_id(0);
mesh[i].normal.x = 0.0f;
mesh[i].normal.y = 0.0f;
mesh[i].normal.z = 0.0f;
mesh[i].normal.w = 0.0f;
mesh[i].position.y = 0.0f;
}现在说不工作,我的意思是什么都没有,(内核根本没有被执行),但是没有错误,没有内核的编译失败。
发布于 2022-02-09 13:27:15
最有可能的解释是,您的Nvidia GT 730内存不足。有些缓冲区无法分配,内核运行时有0的执行时间,但什么也不做。
记录您总共分配了多少内存。它仍然可以与较小的缓冲区一起工作。
https://stackoverflow.com/questions/71022546
复制相似问题