内容来源于 Stack Overflow,遵循 CC BY-SA 4.0 许可协议进行翻译与使用。IT领域专用引擎提供翻译支持
腾讯云小微IT领域专用引擎提供翻译支持
我对cuda相当陌生,我想使用常量内存的概念,但是在运行代码时遇到了非法的内存访问。
我的内核看起来像这样
__global__ void nonceKernel(int inLen, int shaTermLength, BYTE* outSha1, BYTE* outNonce, int nonceLen, int* finishedFlag, int *mutex, int size) if(!*finishedFlag) return; unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x; bool found = true; BYTE tempNonce[2]; BYTE tempSha1[20]; tempNonce[1]=((tid+size) >> 8) & 0x000000FF; tempNonce[0]=(tid+size) & 0x000000FF; CUDA_SHA1 ctx; cuda_sha1_init(&ctx); //init context cuda_sha1_update(&ctx, device_input_data, inLen); // add input buffer cuda_sha1_update(&ctx, tempNonce, nonceLen); //add nonce cuda_sha1_final(&ctx, tempSha1); //compute sha1 for(int i=0; i<shaTermLength; i++) { if(tempSha1[19 - i] != device_sha1_term[shaTermLength - 1 - i]) found=false; if(found == true) { lock(mutex); memcpy(outSha1, tempSha1, 20); //20 bytes for sha1 memcpy(outNonce, tempNonce, nonceLen); //2 bytes for nonce *finishedFlag = 0; unlock(mutex); }
我的中介功能如下:
cudaError_t nonceWithCuda(int intlen, int shaTermLength, BYTE* outSha1, BYTE* outNonce, int *finishFlag, int nonceLen, int size) BYTE *gpuSha1Out; BYTE *gpuNonceOut; int *gpuFinishedFlag; cudaError_t cudaStatus; int *mutex; cudaStatus= cudaSetDevice(0); if(cudaStatus != cudaSuccess) { fprintf(stderr, "cudaSetDevice failed! Do you have a cuda gpu installed?"); goto Error; cudaStatus=cudaMalloc((void**)&gpuFinishedFlag, 1*sizeof(int)); if(cudaStatus != cudaSuccess) { fprintf(stderr, "cudaMalloc for gpuFinishedFlag failed"); goto Error; cudaStatus=cudaMemcpy(gpuFinishedFlag, finishFlag, sizeof(int), cudaMemcpyHostToDevice); if(cudaStatus!=cudaSuccess) { fprintf(stderr, "cudamemcpy 0 to gpuFinishedFlag failed!"); goto Error; while(*finishFlag) { nonceKernel<<<128, 1024>>>(intlen, shaTermLength, gpuSha1Out, gpuNonceOut, nonceLen, gpuFinishedFlag, mutex, size); size++; cudaStatus=cudaMemcpy(finishFlag, gpuFinishedFlag, sizeof(int), cudaMemcpyDeviceToHost); if(cudaStatus!=cudaSuccess) { fprintf(stderr, "cudaMemcpy from gpuFinishedFlag failed, with code: %s!", cudaGetErrorString(cudaStatus)); goto Error; ...... Error: cudaFree(gpuSha1Out); cudaFree(gpuNonceOut); cudaFree(gpuFinishedFlag); return cudaStatus; }
另外,我声明常量变量如下:
__constant__ BYTE* device_input_data; __constant__ BYTE* device_sha1_term;
其中字节被定义为无符号字符 typedef unsigned char BYTE; 。
typedef unsigned char BYTE;
最后是主要功能。
int main(int argc, char** argv) { size_t input_block_size=5; //bytes int nonceLen=2; int finishedFlag=1; BYTE* inputData = (BYTE*) malloc(input_block_size * sizeof(BYTE)); //input byte buffer inputData[0]=0x23; //# inputData[1]=0x30; //0 inputData[2]=0x42; //B inputData[3]=0x69; //i inputData[4]=0x61; //a BYTE* shaTerm = (BYTE*) malloc(nonceLen * sizeof(BYTE)); shaTerm[0]=0x7E; shaTerm[1]=0x46; int shaTermLength = sizeof(shaTerm)/sizeof(shaTerm[0]);//ouput sha buffer cudaStatus=cudaMemcpyToSymbol(device_input_data, inputData, input_block_size * sizeof(BYTE), 0, cudaMemcpyHostToDevice); fprintf(stderr, "MemcpyToSymbol: %s\n", cudaGetErrorString(cudaStatus)); cudaStatus=cudaMemcpyToSymbol(device_sha1_term, shaTerm, shaTermLength * sizeof(BYTE), 0, cudaMemcpyHostToDevice); fprintf(stderr, "MemcpyToSymbol: %s\n", cudaGetErrorString(cudaStatus)); nonceWithCuda(input_block_size, shaTermLength, outputSha1Buffer, outputNonceBuffer, &finishedFlag, 2, size);
错误发生在nonceWithCuda函数的when中,当我将gpu的值复制回主机时,我指的是这段代码:
cudaStatus=cudaMemcpy(finishFlag, gpuFinishedFlag, sizeof(int), cudaMemcpyDeviceToHost); if(cudaStatus!=cudaSuccess) {