关于如何使用cub::DeviceReduce::ArgMin().,我有些困惑在这里,我从幼崽的文档中复制代码。
#include <cub/cub.cuh>
// Declare, allocate, and initialize device-accessible pointers for input and output
int num_items; // e.g., 7
int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9], located in GPU
KeyValuePair<int, int> *d_out; // e.g., [{-,-}]
// Determine temporary device storage requirements
void *d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Run argmin-reduction
cub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items);
// d_out <-- [{5, 0}]问题如下:
发布于 2020-09-08 14:16:32
如果d_in是指向某个GPU内存(设备)的指针,如何初始化d_out的指针?
使用cudaMalloc,类似于初始化d_in指针的方式。
如果ArgMin()的操作在设备(GPU)中完成,我如何将结果复制到我的CPU?
您使用cudaMemcpy,类似于将d_in数据从主机复制到设备的方式,但现在要将d_out数据从一个设备复制到另一个主机。KeyValuePair是一个具有key和value成员的C++对象。
下面是一个完整的例子:
$ cat t37.cu
#include <cub/cub.cuh>
#include <iostream>
int main(){
// Declare, allocate, and initialize device-accessible pointers for input and output
int num_items = 32;
int *d_in;
cub::KeyValuePair<int, int> *d_out;
int *h_in = new int[num_items];
cub::KeyValuePair<int, int> *h_out = new cub::KeyValuePair<int, int>;
cudaMalloc(&d_in, num_items*sizeof(d_in[0]));
cudaMalloc(&d_out, sizeof(cub::KeyValuePair<int, int>));
for (int i = 0; i < num_items; i++) h_in[i] = 4;
h_in[12] = 2; // so we expect our return tuple to be 12,2
cudaMemcpy(d_in, h_in, num_items*sizeof(d_in[0]), cudaMemcpyHostToDevice);
// Determine temporary device storage requirements
void *d_temp_storage = NULL;
size_t temp_storage_bytes = 0;
cub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items);
// Allocate temporary storage
cudaMalloc(&d_temp_storage, temp_storage_bytes);
// Run argmin-reduction
cub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items);
cudaMemcpy(h_out, d_out, sizeof(cub::KeyValuePair<int, int>), cudaMemcpyDeviceToHost);
std::cout << "minimum value: " << h_out[0].value << std::endl;
std::cout << "index of min: " << h_out[0].key << std::endl;
}
$ nvcc -o t37 t37.cu -arch=sm_35 -std=c++14 -Wno-deprecated-gpu-targets
$ ./t37
minimum value: 2
index of min: 12
$https://stackoverflow.com/questions/63786851
复制相似问题