cuda-samples icon indicating copy to clipboard operation
cuda-samples copied to clipboard

Does cuda support allocate meomory in one thread and use cuMemGetAddressRange to check that buffer range

Open changchengx opened this issue 4 years ago • 2 comments

changchengx avatar Dec 16 '21 08:12 changchengx

In one process.

  1. thread A
cudaMalloc(&buffer, 100);
  1. thread B
void *base_address  = (void*)buffer;
size_t alloc_length  = 100;
cuMemGetAddressRange((CUdeviceptr*)&base_address, &alloc_length, (CUdeviceptr)buffer);

Is it right to use cudaMalloc and cuMemGetAddressRange in the above method?

changchengx avatar Dec 16 '21 08:12 changchengx

It reports error in below code:

#include <cuda.h>
#include <cuda_runtime.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <pthread.h>

/*
 * $ gcc -DEXTRA x.c -lcuda -lcudart -lpthread -o mytest
 * $ ./mytest
 * cuda
 * ccuMemGetAddressRange(0x7f8c4da00000) error: invalid device context
 * thread exit
 * read len is 5, dst_buf 123450000
 *
 * $ gcc x.c -lcuda -lcudart -lpthread -o mytest
 * $ ./mytest
 * cuda
 * read len is 5, dst_buf 123450000
 */

void *buffer = NULL;

void uct_cuda_base_mem_query(const void *address, size_t length)
{
#define UCT_CUDA_MEM_QUERY_NUM_ATTRS 3
    CUmemorytype cuda_mem_mype = (CUmemorytype)0;
    uint32_t is_managed        = 0;
    unsigned value             = 1;
    CUdevice cuda_device       = -1;
    void *base_address         = (void*)address;
    size_t alloc_length        = length;
    CUpointer_attribute attr_type[UCT_CUDA_MEM_QUERY_NUM_ATTRS];
    void *attr_data[UCT_CUDA_MEM_QUERY_NUM_ATTRS];
    const char *cu_err_str;
    CUresult cu_err;

    attr_type[0] = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
    attr_data[0] = &cuda_mem_mype;
    attr_type[1] = CU_POINTER_ATTRIBUTE_IS_MANAGED;
    attr_data[1] = &is_managed;
    attr_type[2] = CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL;
    attr_data[2] = &cuda_device;

    cu_err = cuPointerGetAttributes(UCT_CUDA_MEM_QUERY_NUM_ATTRS, attr_type, attr_data, (CUdeviceptr)address);
    if ((cu_err != CUDA_SUCCESS) || (cuda_mem_mype != CU_MEMORYTYPE_DEVICE)) {
        printf("address not recognized\n");
        return;
    }

    if (is_managed) {
        printf("cuda managed\n");
    } else {
        printf("cuda\n");

        /* Synchronize for DMA */
        cu_err = cuPointerSetAttribute(&value,
                                       CU_POINTER_ATTRIBUTE_SYNC_MEMOPS,
                                       (CUdeviceptr)address);
        if (cu_err != CUDA_SUCCESS) {
            cuGetErrorString(cu_err, &cu_err_str);
            printf("cuPointerSetAttribute(%p) error: %s\n", address, cu_err_str);
        }
    }

    cu_err = cuMemGetAddressRange((CUdeviceptr*)&base_address, &alloc_length, (CUdeviceptr)address);
    if (cu_err != CUDA_SUCCESS) {
        cuGetErrorString(cu_err, &cu_err_str);
        printf("ccuMemGetAddressRange(%p) error: %s\n", address, cu_err_str);
    }
}

void *thread_start(void *arg)
{
    uct_cuda_base_mem_query(buffer, 100);
	printf("thread exit\n");
	return NULL;
}

void test_thread()
{
	pthread_attr_t attr;
	pthread_t thread_id;
	void *res;

	if (pthread_attr_init(&attr)) {
		printf("init thread attr failed\n");
	}

	pthread_create(&thread_id, &attr, &thread_start, NULL);

	pthread_attr_destroy(&attr);

	pthread_join(thread_id, &res);
}

void main() {
    char cpu_buf[10] = "123456789";
    char dst_buf[10] = "000000000";
    int cerr,len = 5;

    cerr = cudaSetDevice(0);
    if(cerr != cudaSuccess) {
        printf("cuda set device failed!");
        return;
    }

    cerr = cudaMalloc(&buffer, 100);
    if(cerr != cudaSuccess) {
        printf("cuda memory alloc failed!");
        return;
    }

#if defined(EXTRA)
    test_thread();
#else
    uct_cuda_base_mem_query(buffer, 100);
#endif

    cudaMemcpy(buffer, cpu_buf, 5, cudaMemcpyDefault);
    cudaMemcpy(dst_buf, buffer, 5, cudaMemcpyDefault);
    printf("read len is %d, dst_buf %s\n", len, dst_buf);

out:
    cudaFree(buffer);
}

changchengx avatar Dec 16 '21 08:12 changchengx