目录
- 前言
- 核函数
- 一维
- 二维
- 三维
- 结果分析
前言
所有的代码下载链接:code。以下代码展示了如何在 CUDA 中打印网格和线程的索引信息。代码包括一维、二维和三维的网格和块的设置,并定义了多个内核函数来输出当前的索引信息。
核函数
- 打印线程索引
__global__ void print_idx_kernel(){printf("block idx: (%3d, %3d, %3d), thread idx: (%3d, %3d, %3d)\n",blockIdx.z, blockIdx.y, blockIdx.x,threadIdx.z, threadIdx.y, threadIdx.x);
}
- 打印网格和块的维度
__global__ void print_dim_kernel(){printf("grid dimension: (%3d, %3d, %3d), block dimension: (%3d, %3d, %3d)\n",gridDim.z, gridDim.y, gridDim.x,blockDim.z, blockDim.y, blockDim.x);
}
- 打印每个块的线程索引
__global__ void print_thread_idx_per_block_kernel(){int index = threadIdx.z * blockDim.x * blockDim.y + \threadIdx.y * blockDim.x + \threadIdx.x;printf("block idx: (%3d, %3d, %3d), thread idx: %3d\n",blockIdx.z, blockIdx.y, blockIdx.x,index);
}
- 打印网格和块的维度
__global__ void print_thread_idx_per_grid_kernel(){int bSize = blockDim.z * blockDim.y * blockDim.x;int bIndex = blockIdx.z * gridDim.x * gridDim.y + \blockIdx.y * gridDim.x + \blockIdx.x;int tIndex = threadIdx.z * blockDim.x * blockDim.y + \threadIdx.y * blockDim.x + \threadIdx.x;int index = bIndex * bSize + tIndex;printf("block idx: %3d, thread idx in block: %3d, thread idx: %3d\n", bIndex, tIndex, index);
}
- 打印坐标
__global__ void print_cord_kernel(){int index = threadIdx.z * blockDim.x * blockDim.y + \threadIdx.y * blockDim.x + \threadIdx.x;int x = blockIdx.x * blockDim.x + threadIdx.x;int y = blockIdx.y * blockDim.y + threadIdx.y;int z = blockIdx.z * blockDim.z + threadIdx.z;printf("block idx: (%3d, %3d, %3d), thread idx: %3d, cord: (%3d, %3d, %3d)\n",blockIdx.z, blockIdx.y, blockIdx.x,index, x, y, z);
}
一维
不可以缺少,cudaDeviceSynchronize( CPU与GPU端完成同步),当主函数在cpu中执行到需要调用核函数的时候不会等GPU全部完成返回结果,需要加上这个同步函数,不然运行可执行文件的时候会得到空的结果。
代码
void print_one_dim() {int inputSize = 8;int blockDim = 4;int gridDim = inputSize / blockDim;dim3 block(blockDim);dim3 grid(gridDim);printf("grid dimension: %d, block dimension: %d,\n", grid.x, block.x);cudaDeviceSynchronize();
}
二维
代码
void print_two_dim() {int inputWidth = 4;int blockDim = 2;int gridDim = inputWidth / blockDim;dim3 block(blockDim, blockDim);dim3 grid(gridDim, gridDim);printf("grid dimension: (%d, %d), block dimension: (%d, %d)\n",grid.y, grid.x, block.y, block.x);cudaDeviceSynchronize();
}
三维打印
代码
void print_three_dim() {int depth = 3;int height = 3;int width = 3;int blockDim = 2;dim3 block(blockDim, blockDim, blockDim);dim3 grid((width + blockDim - 1) / blockDim, (height + blockDim - 1) / blockDim,(depth + blockDim - 1) / blockDim);printf("grid dimension: (%d, %d, %d), block dimension: (%d, %d, %d)\n",grid.z, grid.y, grid.x,block.z, block.y, block.x);cudaDeviceSynchronize();
}
主函数
这里就可以自己来使用print_one_dim, print_two_dim, print_three_dim测试不同网格不块的情况。可以自行组合定义核函数来测试所有情况。
int main() {// 选择打印的维度// print_one_dim();// print_two_dim();print_three_dim();return 0;
}
结果分析
这个只是一个小的.cu代码,所以我直接使用了笔记(点击代码链接可以看到)中得第一种方法编译。
打开当前代码目录下运行
nvcc grid_block_123D.cu -o test
./test
得到结果
···txt
grid dimension: (2, 2, 2), block dimension: (2, 2, 2)
block idx: ( 1, 0, 1), thread idx: 0, cord: ( 2, 0, 2)
block idx: ( 1, 0, 1), thread idx: 1, cord: ( 3, 0, 2)
block idx: ( 1, 0, 1), thread idx: 2, cord: ( 2, 1, 2)
block idx: ( 1, 0, 1), thread idx: 3, cord: ( 3, 1, 2)
block idx: ( 1, 0, 1), thread idx: 4, cord: ( 2, 0, 3)
block idx: ( 1, 0, 1), thread idx: 5, cord: ( 3, 0, 3)
block idx: ( 1, 0, 1), thread idx: 6, cord: ( 2, 1, 3)
block idx: ( 1, 0, 1), thread idx: 7, cord: ( 3, 1, 3)
block idx: ( 0, 1, 0), thread idx: 0, cord: ( 0, 2, 0)
block idx: ( 0, 1, 0), thread idx: 1, cord: ( 1, 2, 0)
block idx: ( 0, 1, 0), thread idx: 2, cord: ( 0, 3, 0)
block idx: ( 0, 1, 0), thread idx: 3, cord: ( 1, 3, 0)
block idx: ( 0, 1, 0), thread idx: 4, cord: ( 0, 2, 1)
block idx: ( 0, 1, 0), thread idx: 5, cord: ( 1, 2, 1)
block idx: ( 0, 1, 0), thread idx: 6, cord: ( 0, 3, 1)
block idx: ( 0, 1, 0), thread idx: 7, cord: ( 1, 3, 1)
block idx: ( 1, 0, 0), thread idx: 0, cord: ( 0, 0, 2)
block idx: ( 1, 0, 0), thread idx: 1, cord: ( 1, 0, 2)
block idx: ( 1, 0, 0), thread idx: 2, cord: ( 0, 1, 2)
block idx: ( 1, 0, 0), thread idx: 3, cord: ( 1, 1, 2)
block idx: ( 1, 0, 0), thread idx: 4, cord: ( 0, 0, 3)
block idx: ( 1, 0, 0), thread idx: 5, cord: ( 1, 0, 3)
block idx: ( 1, 0, 0), thread idx: 6, cord: ( 0, 1, 3)
block idx: ( 1, 0, 0), thread idx: 7, cord: ( 1, 1, 3)
block idx: ( 0, 0, 1), thread idx: 0, cord: ( 2, 0, 0)
block idx: ( 0, 0, 1), thread idx: 1, cord: ( 3, 0, 0)
block idx: ( 0, 0, 1), thread idx: 2, cord: ( 2, 1, 0)
block idx: ( 0, 0, 1), thread idx: 3, cord: ( 3, 1, 0)
block idx: ( 0, 0, 1), thread idx: 4, cord: ( 2, 0, 1)
block idx: ( 0, 0, 1), thread idx: 5, cord: ( 3, 0, 1)
block idx: ( 0, 0, 1), thread idx: 6, cord: ( 2, 1, 1)
block idx: ( 0, 0, 1), thread idx: 7, cord: ( 3, 1, 1)
block idx: ( 1, 1, 1), thread idx: 0, cord: ( 2, 2, 2)
block idx: ( 1, 1, 1), thread idx: 1, cord: ( 3, 2, 2)
block idx: ( 1, 1, 1), thread idx: 2, cord: ( 2, 3, 2)
block idx: ( 1, 1, 1), thread idx: 3, cord: ( 3, 3, 2)
block idx: ( 1, 1, 1), thread idx: 4, cord: ( 2, 2, 3)
block idx: ( 1, 1, 1), thread idx: 5, cord: ( 3, 2, 3)
block idx: ( 1, 1, 1), thread idx: 6, cord: ( 2, 3, 3)
block idx: ( 1, 1, 1), thread idx: 7, cord: ( 3, 3, 3)
block idx: ( 0, 1, 1), thread idx: 0, cord: ( 2, 2, 0)
block idx: ( 0, 1, 1), thread idx: 1, cord: ( 3, 2, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 0, 0), thread idx: 4, cord: ( 0, 0, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 2, cord: ( 2, 3, 0)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 3, cord: ( 3, 3, 0)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 4, cord: ( 2, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 5, cord: ( 3, 2, 1)
block idx: ( 0, 1, 1), thread idx: 6, cord: ( 2, 3, 1)
block idx: ( 0, 1, 1), thread idx: 7, cord: ( 3, 3, 1)
block idx: ( 0, 0, 0), thread idx: 0, cord: ( 0, 0, 0)
block idx: ( 0, 0, 0), thread idx: 1, cord: ( 1, 0, 0)
block idx: ( 0, 0, 0), thread idx: 2, cord: ( 0, 1, 0)
block idx: ( 0, 0, 0), thread idx: 3, cord: ( 1, 1, 0)
block idx: ( 0, 0, 0), thread idx: 4, cord: ( 0, 0, 1)
block idx: ( 0, 0, 0), thread idx: 5, cord: ( 1, 0, 1)
block idx: ( 0, 0, 0), thread idx: 6, cord: ( 0, 1, 1)
block idx: ( 0, 0, 0), thread idx: 7, cord: ( 1, 1, 1)
block idx: ( 1, 1, 0), thread idx: 0, cord: ( 0, 2, 2)
block idx: ( 1, 1, 0), thread idx: 1, cord: ( 1, 2, 2)
block idx: ( 1, 1, 0), thread idx: 2, cord: ( 0, 3, 2)
block idx: ( 1, 1, 0), thread idx: 3, cord: ( 1, 3, 2)
block idx: ( 1, 1, 0), thread idx: 4, cord: ( 0, 2, 3)
block idx: ( 1, 1, 0), thread idx: 5, cord: ( 1, 2, 3)
block idx: ( 1, 1, 0), thread idx: 6, cord: ( 0, 3, 3)
block idx: ( 1, 1, 0), thread idx: 7, cord: ( 1, 3, 3)