Thread与Warp
来源:互联网 发布:嘉艺云管理平台软件 编辑:程序博客网 时间:2024/04/30 12:34
__ballot(int predicate):指的是当前线程所在的Wrap中第N个线程对应的predicate值不为零,则将整数零的第N位进行置位
__popc(ballot(int predicate)):返回warp中bool不为零的线程数目
asm("mov.u32 %0, %laneid;" : "=r"(ret)):获得ret为当前线程在所在Warp中的ID
unsigned int ret;asm("mov.u32 %0, %lanemask_lt;" : "=r"(ret));
__popc(ret & __ballot(int predicate)):返回的值为当前线程在所在的Warp中是第几个满足条件的
Example:
#include "cuda_runtime.h"#include "device_launch_parameters.h"#include "device_functions.h"#include <iostream>using namespace std;__device__ __forceinline__ int laneId(){unsigned int ret;asm("mov.u32 %0, %laneid;" : "=r"(ret));return ret;}__device__ __forceinline__ int laneMaskLt(){unsigned int ret;asm("mov.u32 %0, %lanemask_lt;" : "=r"(ret));return ret;}__global__ void testKernel(int *a, int *b, int *c, int *d, int *e, int n){int x = threadIdx.x + blockIdx.x * blockDim.x;if (x >= n){return;}a[x] = __ballot(x > 10);b[x] = laneMaskLt();d[x] = __popc(b[x] & a[x]);c[x] = __popc(a[x]);e[x] = laneId();}int main(){int *a, *b, *c, *d, *e, *dev_a, *dev_b, *dev_c, *dev_d, *dev_e;int n = 64;int size = n * sizeof(int);a = (int *)malloc(size);b = (int *)malloc(size);c = (int *)malloc(size);d = (int *)malloc(size);e = (int *)malloc(size);cudaMalloc(&dev_a, size);cudaMalloc(&dev_b, size);cudaMalloc(&dev_c, size);cudaMalloc(&dev_d, size);cudaMalloc(&dev_e, size);testKernel<<<1, n>>>(dev_a, dev_b, dev_c, dev_d, dev_e, n);cudaMemcpy(a, dev_a, size, cudaMemcpyDeviceToHost);cudaMemcpy(b, dev_b, size, cudaMemcpyDeviceToHost);cudaMemcpy(c, dev_c, size, cudaMemcpyDeviceToHost);cudaMemcpy(d, dev_d, size, cudaMemcpyDeviceToHost);cudaMemcpy(e, dev_e, size, cudaMemcpyDeviceToHost);for (int i = 0; i < n; ++i){printf("%d %d %d %d %d\n", a[i], b[i], c[i], d[i], e[i]);}cudaFree(dev_a);cudaFree(dev_b);cudaFree(dev_c);cudaFree(dev_d);cudaFree(dev_e);free(a);free(b);free(c);free(d);free(e);}
0 0
- Thread与Warp
- CUDA中grid、block、thread、warp与SM、SP的关系
- [原]CUDA中grid、block、thread、warp与SM、SP的关系
- CUDA中grid、block、thread、warp与SM、SP的关系
- CUDA中grid、block、thread、warp与SM、SP的关系
- warP()
- warp
- CUDA学习----sp, sm, thread, block, grid, warp概念
- CUDA2.1-原理之索引与warp
- Struts2.x 集成 Guice 与 warp-persist 大概
- CUDA编程——GPU架构,由sp,sm,thread,block,grid,warp说起
- CUDA编程系列--GPU架构,由sp,sm,thread,block,grid,warp说起
- Cuda 学习教程(五):GPU架构-Sp,sm,thread,block,grid,warp
- cuda warp
- Warp-CTC
- 【并行计算-CUDA开发】CUDA编程——GPU架构,由sp,sm,thread,block,grid,warp说起
- k-thread 与 n-thread
- 长字符串间的换行与单词间的换行-(word-break , word-warp)
- iOS:机型参数、sdk、xcode各版本
- DWR 过滤器验证权限
- centos引导项的重建
- 0710 测试测试测试blog
- 科学家对偏置并不是视而不见
- Thread与Warp
- 代码扫描事项和resin绑定多ip
- Ubuntu 用vsftpd 配置FTP服务器
- python学习之--调用shell脚本
- 我该补一补
- Functional MRI (second edition) -- 4. Basic Principles of MR Image Formation
- map——单词的转换
- QNX Neutrino工具版内核 [procnto-instr]
- An Overview of Oracle Business Intelligence Apps.