我的程序使用CUDA基数排序类。从CUDA 4.0更新到4.2后,一个类辅助初始化函数崩溃,并显示消息"Stack around the variable 'devprop‘is corrupted“(堆栈围绕变量’devprop‘已损坏)。我已经隔离了这个问题,并注释了一些函数代码,发现cudaGetDeviceProperties正在破坏devprop。我只是不知道为什么会发生这种情况,也不知道如何解决这个问题。我的设置是CUDA4.2,开发驱动程序301.32,Nsight 2.2,Windows7 64位,为Win32编译。以下代码片段具有crashing ()辅助函数:
namespace nvRadixSort
{
#include "radixsort.h"
#include "cudpp/cudpp.h"
#include <stdio.h>
#include <assert.h>
bool bManualCoalesce = false;
bool bUsePersistentCTAs = false;
void initDeviceParameters(bool keysOnly)
{
int deviceID = -1;
if(cudaSuccess == cudaGetDevice(&deviceID))
{
cudaDeviceProp devprop;
cudaGetDeviceProperties(&devprop, deviceID);
int smVersion = devprop.major * 10 + devprop.minor;
// sm_12 and later devices don't need help with coalesce in reorderData kernel
bManualCoalesce = (smVersion < 12);
bUsePersistentCTAs = (smVersion < 20);
if(bUsePersistentCTAs)
{
//Irrelevant. My setup is 2.1
}
}
}
}下面是相关的类代码:
#include <cuda_runtime_api.h>
#include "cudpp/cudpp.h"
namespace nvRadixSort
{
class RadixSort
{
public:
RadixSort(unsigned int maxElements, bool keysOnly = false)
: mScanPlan(0),
mNumElements(0),
mTempKeys(0),
mTempValues(0),
mCounters(0),
mCountersSum(0),
mBlockOffsets(0)
{
// Allocate temporary storage
initialize(maxElements, keysOnly);
}
protected: // data
CUDPPHandle mCudppContext;
CUDPPHandle mScanPlan; // CUDPP plan handle for prefix sum
unsigned int mNumElements; // Number of elements of temp storage allocated
unsigned int *mTempKeys; // Intermediate storage for keys
unsigned int *mTempValues; // Intermediate storage for values
unsigned int *mCounters; // Counter for each radix
unsigned int *mCountersSum; // Prefix sum of radix counters
unsigned int *mBlockOffsets; // Global offsets of each radix in each block
protected: // methods
void initialize(unsigned int numElements, bool keysOnly)
{
// initialize parameters based on present CUDA device
initDeviceParameters(keysOnly);
// Allocate temporary storage
mNumElements = numElements;
unsigned int numBlocks = ((numElements % (CTA_SIZE * 4)) == 0) ?
(numElements / (CTA_SIZE * 4)) : (numElements / (CTA_SIZE * 4) + 1);
unsigned int numBlocks2 = ((numElements % (CTA_SIZE * 2)) == 0) ?
(numElements / (CTA_SIZE * 2)) : (numElements / (CTA_SIZE * 2) + 1);
// Initialize scan
cudppCreate(&mCudppContext);
CUDPPConfiguration scanConfig;
scanConfig.algorithm = CUDPP_SCAN;
scanConfig.datatype = CUDPP_UINT;
scanConfig.op = CUDPP_ADD;
scanConfig.options = CUDPP_OPTION_EXCLUSIVE | CUDPP_OPTION_FORWARD;
cudppPlan(mCudppContext , &mScanPlan, scanConfig, 16 * numBlocks2, 1, 0);
cudaMalloc((void **)&mTempKeys, numElements * sizeof(unsigned int));
if(!keysOnly)
cudaMalloc((void **)&mTempValues, numElements * sizeof(unsigned int));
cudaMalloc((void **)&mCounters, WARP_SIZE_ * numBlocks * sizeof(unsigned int));
cudaMalloc((void **)&mCountersSum, WARP_SIZE_ * numBlocks * sizeof(unsigned int));
cudaMalloc((void **)&mBlockOffsets, WARP_SIZE_ * numBlocks * sizeof(unsigned int));
checkCudaError("RadixSort::initialize()");
}
}发布于 2012-07-27 02:05:24
解决了这个问题。我的Visual Studio项目仍然使用CUDA 4.0构建规则和工具,即使有CUDA 4.2版本也是如此。只需将项目文件更改为使用新文件,这就解决了问题。
https://stackoverflow.com/questions/11654502
复制相似问题