我有一个多进程应用程序与一个单一的GPU使用CUDA多进程服务(MPS).每个进程创建几个设备数组,但是其中一个很大(~5GB ),并且它是一个常量数组,因此我认为可以用一个进程分配一次内存,并指示其他进程使用“进程间通信”(类似于显示这里的CUDA示例)从该内存块读取。
按照链接的CUDA示例,我尝试实现一个简单的测试程序,但是一直碰到一个API错误。当我调用cudaIPCOpenMemHandle时,似乎有一个无效的论点。我发布了下面的代码,希望有人能够很容易地找出错误的原因,或者建议更好地使用CUDA API来完成我想要做的事情。
#include <stdio.h>
#include <mpi.h>
#include <assert.h>
#include <sys/mman.h>
#define blockSize 128
#define N 1000
#define gpuErr(ans) { gpuAssert((ans), __FILE__, __LINE__); }
__global__ void kernel(double* out, double* in, double val){
unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int thread_stride = blockDim.x * gridDim.x;
for (int i=tid; i < N; i+=thread_stride){
out[i] = in[i]*val;
}
}
static void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
static void error_msg(cudaError_t err, int rank){
if (err != cudaSuccess){
printf("RANK %d recvd CUDA error message: %s\n", rank, cudaGetErrorString(err));
exit(err);
}
}
void check_access(){
cudaDeviceProp prop;
gpuErr(cudaGetDeviceProperties(&prop, 0));
if (prop.unifiedAddressing)
printf("> GPU%d = is capable of UVA\n", 0);
// NOTE: only interested in enabling intra-device peer2peer, so I think this test doesnt matter ?
//int can_access=-1;
//int num_dev=2;
//// note, here I was confused, I want the ability to have a process on device 0 access
//for (peer_dev=0; peer_dev <num_dev, peer_dev++){
// int peer_dev=0; // note if peer_dev is 1
// gpuErr(cudaDeviceCanAccessPeer(&can_access, 0,peer_dev));
// if (can_access)
// printf("device 0 has peerdev=%d access\n", peer_dev);
// else
// printf("device 0 has no peerdev=%d access\n", peer_dev);
//}
}
int main(){
MPI_Init(NULL,NULL);
int size,rank;
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
if (rank==0)
check_access();
gpuErr(cudaSetDevice(0));
double* out;
double * in;
gpuErr(cudaMallocManaged((void **)&out, N*sizeof(double)));
cudaIpcMemHandle_t * memHand = (cudaIpcMemHandle_t *)
mmap(NULL, sizeof(cudaIpcMemHandle_t),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0);
assert(MAP_FAILED != memHand);
memset((void *) memHand, 0, sizeof(cudaIpcMemHandle_t));
MPI_Barrier(MPI_COMM_WORLD);
if (rank==0){
gpuErr(cudaMalloc((void **)&in, N*sizeof(double)));
gpuErr(cudaIpcGetMemHandle((cudaIpcMemHandle_t *) &memHand[0], (void *)in));
double * temp = new double[N];
for (int i=0; i < N; i++)
temp[i] = 1;
gpuErr(cudaMemcpy(in, temp, N*sizeof(double), cudaMemcpyHostToDevice));
delete temp;
}
MPI_Barrier(MPI_COMM_WORLD);
// the following is throwing a CUDAerror, invalid
if (rank >0 )
gpuErr(cudaIpcOpenMemHandle((void **) &in, memHand[0], cudaIpcMemLazyEnablePeerAccess));
MPI_Barrier(MPI_COMM_WORLD);
int numBlocks = (N + blockSize - 1) / blockSize;
double rank_val=(double) rank;
kernel<<<numBlocks, blockSize>>>(out, in, rank_val);
error_msg(cudaGetLastError(), rank);
gpuErr(cudaDeviceSynchronize());
MPI_Barrier(MPI_COMM_WORLD);
// test the kernel results
double sum = 0;
for (int i=0; i < N; i++)
sum += out[i];
printf("mpirank=%d, comm.size=%d, result=%f\n", rank, size, sum);
assert(sum==N*rank);
// cleanup
if (rank>0)
cudaIpcCloseMemHandle(in);
cudaFree(out);
if (rank==0)
cudaFree(in);
return 0;
}我用
nvcc -I/usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/include --compiler-options=-march=skylake-avx512 -L/usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib -lmpi ipc_tester.cu单进程作业输出:(看起来正确)
$ srun -n1 -c2 ./a.out
> GPU0 = is capable of UVA
mpirank=0, comm.size=1, result=0.000000多进程作业输出:(命中调用cudaIPCOpenMemHandle中的错误)
$ srun -n2 -c2 ./a.out
GPUassert: invalid argument ipc_tester.cu 92计算杀菌剂输出:
$ srun -n2 -c2 compute-sanitizer ./a.out
========= COMPUTE-SANITIZER
========= COMPUTE-SANITIZER
========= Program hit invalid device context (error 201) on CUDA API call to cuCtxGetDevice.
========= Saved host backtrace up to driver entry point at error
========= Program hit invalid device context (error 201) on CUDA API call to cuCtxGetDevice.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:cuCtxGetDevice [0x155550d083eb]
========= in /usr/common/software/sles15_cgpu/cuda/11.1.1/lib64/compat/libcuda.so.1
========= Host Frame:uct_cuda_base_query_devices [0x15553e03f170]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/ucx/libuct_cuda.so.0
========= Host Frame:cuCtxGetDevice [0x155550d083eb]
========= in /usr/common/software/sles15_cgpu/cuda/11.1.1/lib64/compat/libcuda.so.1
========= Host Frame:uct_md_query_tl_resources [0x15553e6c44c6]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libuct.so.0
========= Host Frame: [0x15553e9095a9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15553e90a7f9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15553e90abfd]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame:ucp_init_version [0x15553e90b7f3]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame:uct_cuda_base_query_devices [0x155546040170]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/ucx/libuct_cuda.so.0
========= Host Frame:mca_pml_ucx_open [0x15553edc7e70]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/openmpi/mca_pml_ucx.so
========= Host Frame:mca_base_framework_components_open [0x15555299ef2d]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame: [0x155554472ec7]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:uct_md_query_tl_resources [0x1555466c54c6]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libuct.so.0
========= Host Frame: [0x15554690a5a9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15554690b7f9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15554690bbfd]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame:mca_base_framework_open [0x1555529a8b31]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame:ompi_mpi_init [0x15555447fb5b]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:MPI_Init [0x15555442dc01]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:ucp_init_version [0x15554690c7f3]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x403f04]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
========= Host Frame:__libc_start_main [0x1555531173ea]
========= in /lib64/libc.so.6
========= Host Frame: [0x403d1a]
========= Host Frame:mca_pml_ucx_open [0x155546dc8e70]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/openmpi/mca_pml_ucx.so
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
=========
========= Host Frame:mca_base_framework_components_open [0x15555299ef2d]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame: [0x155554472ec7]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:mca_base_framework_open [0x1555529a8b31]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame:ompi_mpi_init [0x15555447fb5b]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:MPI_Init [0x15555442dc01]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame: [0x403f04]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
========= Host Frame:__libc_start_main [0x1555531173ea]
========= in /lib64/libc.so.6
========= Host Frame: [0x403d1a]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
=========
========= Program hit invalid device context (error 201) on CUDA API call to cuCtxGetDevice.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:cuCtxGetDevice [0x155550d083eb]
========= in /usr/common/software/sles15_cgpu/cuda/11.1.1/lib64/compat/libcuda.so.1
========= Host Frame:uct_cuda_base_query_devices [0x15553e03f170]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/ucx/libuct_cuda.so.0
========= Host Frame:uct_md_query_tl_resources [0x15553e6c44c6]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libuct.so.0
========= Host Frame: [0x15553e9095a9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15553e90a7f9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15553e90abfd]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame:ucp_init_version [0x15553e90b7f3]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame:mca_pml_ucx_open [0x15553edc7e70]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/openmpi/mca_pml_ucx.so
========= Host Frame:mca_base_framework_components_open [0x15555299ef2d]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame: [0x155554472ec7]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Program hit invalid device context (error 201) on CUDA API call to cuCtxGetDevice.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:mca_base_framework_open [0x1555529a8b31]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame:cuCtxGetDevice [0x155550d083eb]
========= in /usr/common/software/sles15_cgpu/cuda/11.1.1/lib64/compat/libcuda.so.1
========= Host Frame:ompi_mpi_init [0x15555447fb5b]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:uct_cuda_base_query_devices [0x155546040170]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/ucx/libuct_cuda.so.0
========= Host Frame:MPI_Init [0x15555442dc01]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame: [0x403f04]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
========= Host Frame:uct_md_query_tl_resources [0x1555466c54c6]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libuct.so.0
========= Host Frame: [0x15554690a5a9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15554690b7f9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15554690bbfd]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame:ucp_init_version [0x15554690c7f3]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame:__libc_start_main [0x1555531173ea]
========= in /lib64/libc.so.6
========= Host Frame: [0x403d1a]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
=========
========= Host Frame:mca_pml_ucx_open [0x155546dc8e70]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/openmpi/mca_pml_ucx.so
========= Host Frame:mca_base_framework_components_open [0x15555299ef2d]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame: [0x155554472ec7]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:mca_base_framework_open [0x1555529a8b31]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame:ompi_mpi_init [0x15555447fb5b]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:MPI_Init [0x15555442dc01]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame: [0x403f04]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
========= Host Frame:__libc_start_main [0x1555531173ea]
========= in /lib64/libc.so.6
========= Host Frame: [0x403d1a]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
=========
========= Program hit invalid device context (error 201) on CUDA API call to cuCtxGetDevice.
========= Saved host backtrace up to driver entry point at error
========= Program hit invalid device context (error 201) on CUDA API call to cuCtxGetDevice.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:cuCtxGetDevice [0x155550d083eb]
========= in /usr/common/software/sles15_cgpu/cuda/11.1.1/lib64/compat/libcuda.so.1
========= Host Frame:cuCtxGetDevice [0x155550d083eb]
========= in /usr/common/software/sles15_cgpu/cuda/11.1.1/lib64/compat/libcuda.so.1
========= Host Frame:uct_cuda_base_query_devices [0x15553e03f170]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/ucx/libuct_cuda.so.0
========= Host Frame:uct_cuda_base_query_devices [0x155546040170]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/ucx/libuct_cuda.so.0
========= Host Frame:uct_md_query_tl_resources [0x15553e6c44c6]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libuct.so.0
========= Host Frame: [0x15553e9095a9]
========= Host Frame:uct_md_query_tl_resources [0x1555466c54c6]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libuct.so.0
========= Host Frame: [0x15554690a5a9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15554690b7f9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15554690bbfd]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15553e90a7f9]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame: [0x15553e90abfd]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame:ucp_init_version [0x15553e90b7f3]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame:ucp_init_version [0x15554690c7f3]
========= in /usr/common/software/sles15_cgpu/ucx/1.10.1/lib/libucp.so.0
========= Host Frame:mca_pml_ucx_open [0x155546dc8e70]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/openmpi/mca_pml_ucx.so
========= Host Frame:mca_pml_ucx_open [0x15553edc7e70]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/openmpi/mca_pml_ucx.so
========= Host Frame:mca_base_framework_components_open [0x15555299ef2d]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame: [0x155554472ec7]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:mca_base_framework_components_open [0x15555299ef2d]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame: [0x155554472ec7]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:mca_base_framework_open [0x1555529a8b31]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame:mca_base_framework_open [0x1555529a8b31]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libopen-pal.so.40
========= Host Frame:ompi_mpi_init [0x15555447fb5b]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:ompi_mpi_init [0x15555447fb5b]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame:MPI_Init [0x15555442dc01]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame: [0x403f04]
========= Host Frame:MPI_Init [0x15555442dc01]
========= in /usr/common/software/sles15_cgpu/openmpi/4.0.3/gcc/lib/libmpi.so.40
========= Host Frame: [0x403f04]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
========= Host Frame:__libc_start_main [0x1555531173ea]
========= in /lib64/libc.so.6
========= Host Frame: [0x403d1a]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
=========
========= Host Frame:__libc_start_main [0x1555531173ea]
========= in /lib64/libc.so.6
========= Host Frame: [0x403d1a]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
=========
========= Program hit invalid argument (error 1) on CUDA API call to cudaIpcOpenMemHandle.
========= Saved host backtrace up to driver entry point at error
========= Host Frame: [0x155550dde1b3]
========= in /usr/common/software/sles15_cgpu/cuda/11.1.1/lib64/compat/libcuda.so.1
========= Host Frame: [0x433fac]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
========= Host Frame: [0x40412e]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
========= Host Frame:__libc_start_main [0x1555531173ea]
========= in /lib64/libc.so.6
========= Host Frame: [0x403d1a]
========= in /global/cfs/cdirs/lcls/dermen/dulios/./a.out
=========
GPUassert: invalid argument ipc_tester.cu 92
========= Error: process didn't terminate successfully
========= Target application returned an error
========= ERROR SUMMARY: 4 errors系统信息:
$ lsb_release -a
LSB Version: n/a
Distributor ID: SUSE
Description: SUSE Linux Enterprise Server 15 SP2
Release: 15.2
Codename: n/a
$ nvidia-smi
Tue Sep 27 10:05:48 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.102.04 Driver Version: 450.102.04 CUDA Version: 11.1 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 Tesla V100-SXM2... On | 00000000:89:00.0 Off | 0 |
| N/A 34C P0 38W / 300W | 0MiB / 16160MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+发布于 2022-09-27 21:02:52
正如评论中所指出的,memHandler没有正确地设置在ranks>0上。
在学习了如何使用广播memHandler之后,我找到了一个解决方案。下面的修补程序将导致一个工作代码。
@@ -66,12 +66,7 @@ int main(){
double * in;
gpuErr(cudaMallocManaged((void **)&out, N*sizeof(double)));
- cudaIpcMemHandle_t * memHand = (cudaIpcMemHandle_t *)
- mmap(NULL, sizeof(cudaIpcMemHandle_t),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0);
-
- assert(MAP_FAILED != memHand);
- memset((void *) memHand, 0, sizeof(cudaIpcMemHandle_t));
+ cudaIpcMemHandle_t memHand[1];
MPI_Barrier(MPI_COMM_WORLD);
@@ -87,6 +82,21 @@ int main(){
}
MPI_Barrier(MPI_COMM_WORLD);
+// Broadcast the MPI handle
+// get size of memHandler container needed for broadcast
+ int hand_size[1];
+ if (rank==0)
+ hand_size[0]= sizeof(memHand[0]);
+ MPI_Bcast(&hand_size[0], 1, MPI_INT, 0, MPI_COMM_WORLD);
+
+ // create the char container for memHandler broadcast
+ char memHand_C[hand_size[0]];
+ if (rank==0)
+ memcpy(&memHand_C, &memHand[0], hand_size[0]);
+ MPI_Bcast(&memHand_C, hand_size[0], MPI_BYTE, 0, MPI_COMM_WORLD);
+ if (rank >0)
+ memcpy(&memHand[0], &memHand_C, hand_size[0]);
+
// the following is throwing a CUDAerror, invalid
if (rank >0 )
gpuErr(cudaIpcOpenMemHandle((void **) &in, memHand[0], cudaIpcMemLazyEnablePeerAccess));https://stackoverflow.com/questions/73871402
复制相似问题