test.cu (3713B)
1 #include <stdio.h> 2 #include <assert.h> 3 #include <cuda.h> 4 5 #include "test.h" 6 7 // these macros are really really helpful 8 # define CUDA_SAFE_CALL( call) { \ 9 cudaError err = call; \ 10 if( cudaSuccess != err) { \ 11 fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ 12 __FILE__, __LINE__, cudaGetErrorString( err) ); \ 13 exit(EXIT_FAILURE); \ 14 } } 15 16 #define CHECKLASTERROR { \ 17 cudaError_t err = cudaGetLastError(); \ 18 if (err != cudaSuccess) { \ 19 fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ 20 __FILE__, __LINE__, cudaGetErrorString( err) ); \ 21 exit(EXIT_FAILURE); \ 22 } } 23 24 25 #define SIZ 128 26 27 __global__ void truc(unsigned int * buf) { 28 if (threadIdx.x < SIZ) { 29 buf[threadIdx.x] = buf[threadIdx.x] << 5; 30 } 31 __syncthreads(); 32 } 33 34 int testcuda() 35 { 36 unsigned int* foo = (unsigned int*) malloc(SIZ * sizeof(unsigned int)); 37 for (int x = 0; x < SIZ; ++x) { 38 foo[x] = 1; 39 } 40 41 unsigned int * recf = NULL; 42 CUDA_SAFE_CALL( cudaMalloc((void **) &recf, SIZ * sizeof(unsigned int)) ); 43 CUDA_SAFE_CALL(cudaMemcpy(recf, foo, SIZ * sizeof(unsigned int), cudaMemcpyHostToDevice)); 44 truc<<<1, SIZ>>>(recf); 45 CHECKLASTERROR 46 CUDA_SAFE_CALL(cudaMemcpy(foo, recf, SIZ * sizeof(unsigned int), cudaMemcpyDeviceToHost)); 47 printf("2^5 -> %u\n", foo[5]); 48 49 CUDA_SAFE_CALL(cudaFree(recf)); 50 51 /* 52 int deviceCount; 53 cudaGetDeviceCount(&deviceCount); 54 printf("device count %d\n", deviceCount); 55 56 cudaDeviceProp dP; 57 cudaGetDeviceProperties(&dP, NULL); 58 //printf("Max threads per block: %d\n", dP.maxThreadsPerBlock); 59 //printf("Max Threads DIM: %d x %d x %d\n", dP.maxThreadsDim[0], dP.maxThreadsDim[1], dP.maxThreadsDim[2]); 60 //printf("Max Grid Size: %d x %d x %d\n", dP.maxGridSize[0], dP.maxGridSize[1], dP.maxGridSize[2]); 61 62 cudaDeviceProp* pDeviceProp = &dP; 63 64 printf( "\nDevice Name \t - %s ", pDeviceProp->name ); 65 printf( "\n**************************************"); 66 printf( "\nTotal Global Memory\t\t -%d KB", pDeviceProp->totalGlobalMem/1024 ); 67 printf( "\nShared memory available per block \t - %d KB", pDeviceProp->sharedMemPerBlock/1024 ); 68 printf( "\nNumber of registers per thread block \t - %d", pDeviceProp->regsPerBlock ); 69 printf( "\nWarp size in threads \t - %d", pDeviceProp->warpSize ); 70 printf( "\nMemory Pitch \t - %d bytes", pDeviceProp->memPitch ); 71 printf( "\nMaximum threads per block \t - %d", pDeviceProp->maxThreadsPerBlock ); 72 printf( "\nMaximum Thread Dimension (block) \t - %d %d %d", pDeviceProp->maxThreadsDim[0], pDeviceProp->maxThreadsDim[1], pDeviceProp->maxThreadsDim[2] ); 73 printf( "\nMaximum Thread Dimension (grid) \t - %d %d %d", pDeviceProp->maxGridSize[0], pDeviceProp->maxGridSize[1], pDeviceProp->maxGridSize[2] ); 74 printf( "\nTotal constant memory \t - %d bytes", pDeviceProp->totalConstMem ); 75 printf( "\nCUDA ver \t - %d.%d", pDeviceProp->major, pDeviceProp->minor ); 76 printf( "\nClock rate \t - %d KHz", pDeviceProp->clockRate ); 77 printf( "\nTexture Alignment \t - %d bytes", pDeviceProp->textureAlignment ); 78 printf( "\nDevice Overlap \t - %s", pDeviceProp-> deviceOverlap?"Allowed":"Not Allowed" ); 79 printf( "\nNumber of Multi processors \t - %d\n", pDeviceProp->multiProcessorCount ); 80 */ 81 82 return 0; 83 } 84