cuda-samples
cuda-samples copied to clipboard
Does cuda support allocate meomory in one thread and use cuMemGetAddressRange to check that buffer range
In one process.
- thread A
cudaMalloc(&buffer, 100);
- thread B
void *base_address = (void*)buffer;
size_t alloc_length = 100;
cuMemGetAddressRange((CUdeviceptr*)&base_address, &alloc_length, (CUdeviceptr)buffer);
Is it right to use cudaMalloc and cuMemGetAddressRange in the above method?
It reports error in below code:
#include <cuda.h>
#include <cuda_runtime.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <pthread.h>
/*
* $ gcc -DEXTRA x.c -lcuda -lcudart -lpthread -o mytest
* $ ./mytest
* cuda
* ccuMemGetAddressRange(0x7f8c4da00000) error: invalid device context
* thread exit
* read len is 5, dst_buf 123450000
*
* $ gcc x.c -lcuda -lcudart -lpthread -o mytest
* $ ./mytest
* cuda
* read len is 5, dst_buf 123450000
*/
void *buffer = NULL;
void uct_cuda_base_mem_query(const void *address, size_t length)
{
#define UCT_CUDA_MEM_QUERY_NUM_ATTRS 3
CUmemorytype cuda_mem_mype = (CUmemorytype)0;
uint32_t is_managed = 0;
unsigned value = 1;
CUdevice cuda_device = -1;
void *base_address = (void*)address;
size_t alloc_length = length;
CUpointer_attribute attr_type[UCT_CUDA_MEM_QUERY_NUM_ATTRS];
void *attr_data[UCT_CUDA_MEM_QUERY_NUM_ATTRS];
const char *cu_err_str;
CUresult cu_err;
attr_type[0] = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
attr_data[0] = &cuda_mem_mype;
attr_type[1] = CU_POINTER_ATTRIBUTE_IS_MANAGED;
attr_data[1] = &is_managed;
attr_type[2] = CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL;
attr_data[2] = &cuda_device;
cu_err = cuPointerGetAttributes(UCT_CUDA_MEM_QUERY_NUM_ATTRS, attr_type, attr_data, (CUdeviceptr)address);
if ((cu_err != CUDA_SUCCESS) || (cuda_mem_mype != CU_MEMORYTYPE_DEVICE)) {
printf("address not recognized\n");
return;
}
if (is_managed) {
printf("cuda managed\n");
} else {
printf("cuda\n");
/* Synchronize for DMA */
cu_err = cuPointerSetAttribute(&value,
CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
(CUdeviceptr)address);
if (cu_err != CUDA_SUCCESS) {
cuGetErrorString(cu_err, &cu_err_str);
printf("cuPointerSetAttribute(%p) error: %s\n", address, cu_err_str);
}
}
cu_err = cuMemGetAddressRange((CUdeviceptr*)&base_address, &alloc_length, (CUdeviceptr)address);
if (cu_err != CUDA_SUCCESS) {
cuGetErrorString(cu_err, &cu_err_str);
printf("ccuMemGetAddressRange(%p) error: %s\n", address, cu_err_str);
}
}
void *thread_start(void *arg)
{
uct_cuda_base_mem_query(buffer, 100);
printf("thread exit\n");
return NULL;
}
void test_thread()
{
pthread_attr_t attr;
pthread_t thread_id;
void *res;
if (pthread_attr_init(&attr)) {
printf("init thread attr failed\n");
}
pthread_create(&thread_id, &attr, &thread_start, NULL);
pthread_attr_destroy(&attr);
pthread_join(thread_id, &res);
}
void main() {
char cpu_buf[10] = "123456789";
char dst_buf[10] = "000000000";
int cerr,len = 5;
cerr = cudaSetDevice(0);
if(cerr != cudaSuccess) {
printf("cuda set device failed!");
return;
}
cerr = cudaMalloc(&buffer, 100);
if(cerr != cudaSuccess) {
printf("cuda memory alloc failed!");
return;
}
#if defined(EXTRA)
test_thread();
#else
uct_cuda_base_mem_query(buffer, 100);
#endif
cudaMemcpy(buffer, cpu_buf, 5, cudaMemcpyDefault);
cudaMemcpy(dst_buf, buffer, 5, cudaMemcpyDefault);
printf("read len is %d, dst_buf %s\n", len, dst_buf);
out:
cudaFree(buffer);
}