waf

FORK: waf with some random patches
git clone https://git.neptards.moe/neptards/waf.git
Log | Files | Refs | README

test.cu (3713B)


      1 #include <stdio.h>
      2 #include <assert.h>
      3 #include <cuda.h>
      4 
      5 #include "test.h"
      6 
      7 // these macros are really really helpful
      8 #  define CUDA_SAFE_CALL( call) {                                            \
      9     cudaError err = call;                                                    \
     10     if( cudaSuccess != err) {                                                \
     11         fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n",        \
     12                 __FILE__, __LINE__, cudaGetErrorString( err) );              \
     13         exit(EXIT_FAILURE);                                                  \
     14     } }
     15 
     16 #define CHECKLASTERROR   {                                                 \
     17 	cudaError_t err = cudaGetLastError();                                    \
     18 	if (err != cudaSuccess) {                                                \
     19 		fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n",        \
     20                 __FILE__, __LINE__, cudaGetErrorString( err) );              \
     21         exit(EXIT_FAILURE);                                                  \
     22 	} }
     23 
     24 
     25 #define SIZ 128
     26 
     27 __global__ void truc(unsigned int * buf) {
     28 	if (threadIdx.x < SIZ) {
     29 		buf[threadIdx.x] = buf[threadIdx.x] << 5;
     30 	}
     31 	__syncthreads();
     32 }
     33 
     34 int testcuda()
     35 {
     36 	unsigned int* foo = (unsigned int*) malloc(SIZ * sizeof(unsigned int));
     37 	for (int x = 0; x < SIZ; ++x) {
     38 		foo[x] = 1;
     39 	}
     40 
     41 	unsigned int * recf = NULL;
     42 	CUDA_SAFE_CALL( cudaMalloc((void **) &recf, SIZ * sizeof(unsigned int)) );
     43 	CUDA_SAFE_CALL(cudaMemcpy(recf, foo,  SIZ * sizeof(unsigned int), cudaMemcpyHostToDevice));
     44 	truc<<<1, SIZ>>>(recf);
     45 	CHECKLASTERROR
     46 	CUDA_SAFE_CALL(cudaMemcpy(foo, recf, SIZ * sizeof(unsigned int), cudaMemcpyDeviceToHost));
     47 	printf("2^5 -> %u\n", foo[5]);
     48 
     49 	CUDA_SAFE_CALL(cudaFree(recf));
     50 
     51 /*
     52 int deviceCount;
     53 cudaGetDeviceCount(&deviceCount);
     54 printf("device count %d\n", deviceCount);
     55 
     56 cudaDeviceProp dP;
     57 cudaGetDeviceProperties(&dP, NULL);
     58 //printf("Max threads per block: %d\n", dP.maxThreadsPerBlock);
     59 //printf("Max Threads DIM: %d x %d x %d\n", dP.maxThreadsDim[0], dP.maxThreadsDim[1], dP.maxThreadsDim[2]);
     60 //printf("Max Grid Size: %d x %d x %d\n", dP.maxGridSize[0], dP.maxGridSize[1], dP.maxGridSize[2]);
     61 
     62 cudaDeviceProp* pDeviceProp = &dP;
     63 
     64      printf( "\nDevice Name \t - %s ", pDeviceProp->name );  
     65      printf( "\n**************************************");  
     66      printf( "\nTotal Global Memory\t\t -%d KB", pDeviceProp->totalGlobalMem/1024 );  
     67      printf( "\nShared memory available per block \t - %d KB", pDeviceProp->sharedMemPerBlock/1024 );  
     68      printf( "\nNumber of registers per thread block \t - %d", pDeviceProp->regsPerBlock );  
     69      printf( "\nWarp size in threads \t - %d", pDeviceProp->warpSize );  
     70      printf( "\nMemory Pitch \t - %d bytes", pDeviceProp->memPitch );  
     71      printf( "\nMaximum threads per block \t - %d", pDeviceProp->maxThreadsPerBlock );  
     72      printf( "\nMaximum Thread Dimension (block) \t - %d %d %d", pDeviceProp->maxThreadsDim[0], pDeviceProp->maxThreadsDim[1], pDeviceProp->maxThreadsDim[2] );  
     73      printf( "\nMaximum Thread Dimension (grid) \t - %d %d %d", pDeviceProp->maxGridSize[0], pDeviceProp->maxGridSize[1], pDeviceProp->maxGridSize[2] );  
     74      printf( "\nTotal constant memory \t - %d bytes", pDeviceProp->totalConstMem );  
     75      printf( "\nCUDA ver \t - %d.%d", pDeviceProp->major, pDeviceProp->minor );  
     76      printf( "\nClock rate \t - %d KHz", pDeviceProp->clockRate );  
     77      printf( "\nTexture Alignment \t - %d bytes", pDeviceProp->textureAlignment );  
     78      printf( "\nDevice Overlap \t - %s", pDeviceProp-> deviceOverlap?"Allowed":"Not Allowed" );  
     79      printf( "\nNumber of Multi processors \t - %d\n", pDeviceProp->multiProcessorCount );  
     80 */
     81 
     82 	return 0;
     83 }
     84