UnityShader——初探Compute Shader

Compute Shader是基于DX11(SM4.5+)的在GPU上运行的程序,通过Compute Shader我们可以将大量可以并行的计算放到GPU中计算从而节省CPU资源,Unity 5.6版本提供的 Graphics.DrawMeshInstancedIndirect 接口可以非常方便的配合ComputeShader做大规模渲染。

先将一些Compute Shader中不同于普通Shader的概念梳理下:

numthreads 定义了一个三维的线程结构,


SV_GroupThreadID 表示该线程在该组内的位置
SV_GroupID 表示整个组所分配的位置
SV_DispatchThreadID 表示该线程在所有组的线程中的位置
SV_GroupIndex 表示该线程在该组内的索引


如果是计算4X4的矩阵加法,可以定义为4X4X1的numthreads结构,这样线程的索引会自动匹配输入的矩阵,同样,我们可以定义16X1X1的结构,但这样只能基于当前线程数去计算输入矩阵(原文是 however it would then have to calculate the current matrix entry based on the current thread number. 没太理解)

SM4.5 允许numthreads最多768条线程
SM5.0 允许numthreads最多1024条线程


又因为贴图的Mip level在compute shader中没有定义,因此无法将线程数匹配到具体像素,必须自己定义Mip level,所以使用Texture.SampleLevel 或者 Texture.Load 来采样,几何着色器和顶点着色器同理。



public struct MyInstance{    public Vector3 color;    public Vector3 position;    public Vector3 velocity;    public Vector3 scale;}
struct _myIns{    float3 color;    float3 position;    float3 velocity;    float3 scale;};

在C#脚本中初始化ComputeBuffer并赋值到Compute Shader和渲染用的普通Shader中

void InitBuffer()    {        argsBuffer = new ComputeBuffer(1, args.Length * sizeof(uint), ComputeBufferType.IndirectArguments);        uint numIndices = meshInstance.GetIndexCount(0);        args[0] = numIndices;        args[1] = (uint)num;        argsBuffer.SetData(args);        instanceBuffer = new ComputeBuffer(num, MySize.SizeOfFloat3*4);        _instance = new MyInstance[num];        for (int i = 0; i < num; i++)        {            MyInstance mi = new MyInstance();            mi.color = new Vector3(Random.Range(0f, 1f), Random.Range(0f, 1f), Random.Range(0f, 1f));            mi.position = Random.insideUnitSphere * Radius;            mi.velocity = Random.insideUnitSphere;            mi.scale = Vector3.one;            _instance[i] = mi;        }        instanceBuffer.SetData(_instance);        matinstance.SetBuffer("positionBuffer", instanceBuffer);        //compute shader init        _kernel = insCompute.FindKernel("CSMain");        if (_kernel == -1)        {            Debug.LogError("Failed to find kernel");            return;        }        insCompute.SetBuffer(_kernel, "inss", instanceBuffer);        insCompute.SetFloat("deltaTime", Time.fixedDeltaTime);        insCompute.SetFloat("radiu", Radius);        insCompute.SetTexture(_kernel, "noiseTex", noiseTex);    }


float deltaTime;float radiu;RWStructuredBuffer<_myIns> inss;Texture3D<float4> noiseTex;SamplerState samplernoiseTex{    Filter = MIN_MAG_MIP_LINEAR;    AddressU = Wrap;    AddressV = Wrap;};[numthreads(BLOCKSIZE,1,1)]void CSMain (uint3 id : SV_DispatchThreadID){    // TODO: insert actual code here!    uint i = id.x;    uint num, stride;    inss.GetDimensions(num, stride);    float3 position = inss[i].position;    float3 velocity = inss[i].velocity;    float3 ns = inss[i].scale;    float3 uv = float3(abs(position.x),abs(position.y),abs(position.z))/radiu;    ns = noiseTex.SampleLevel(samplernoiseTex,uv,0);    //caculate    position += 5 * velocity * deltaTime;       if(i < num)    {        inss[i].position = position;        inss[i].velocity = velocity;        inss[i].scale = ns*ns;    }}

普通Shader中通过SV_InstanceID获取GPU Instance索引

v2f vert (appdata_full v, uint instanceID : SV_InstanceID)            {                #if SHADER_TARGET >= 45                _myIns data = positionBuffer[instanceID];                #else                _myIns data = 0;                #endif                float3 localPosition = v.vertex.xyz * data.scale;                float3 worldPosition = data.position + localPosition;                float3 worldNormal = v.normal;                float3 ndotl = saturate(dot(worldNormal, _WorldSpaceLightPos0.xyz));                float3 ambient = ShadeSH9(float4(worldNormal, 1.0f));                float3 diffuse = (ndotl * _LightColor0.rgb);                float3 color = data.color;                v2f o;                o.pos = mul(UNITY_MATRIX_VP, float4(worldPosition, 1.0f));                o.uv_MainTex = v.texcoord;                o.ambient = ambient;                o.diffuse = diffuse;                o.color = color;                TRANSFER_SHADOW(o)                return o;            }


private void Update()    {        var numOfGroups = Mathf.CeilToInt((float)num / GroupSize);        insCompute.Dispatch(_kernel, numOfGroups, 1, 1);        Bounds bs = new Bounds(transform.position, Vector3.one * Radius);        Graphics.DrawMeshInstancedIndirect(meshInstance, 0, matinstance, bs, argsBuffer);    }

