diff --git a/internal/graphicsdriver/directx/graphics12_windows.go b/internal/graphicsdriver/directx/graphics12_windows.go index 4f9f78164..370572ecb 100644 --- a/internal/graphicsdriver/directx/graphics12_windows.go +++ b/internal/graphicsdriver/directx/graphics12_windows.go @@ -94,6 +94,7 @@ type graphics12 struct { shaders map[graphicsdriver.ShaderID]*shader12 nextShaderID graphicsdriver.ShaderID disposedShaders [frameCount][]*shader12 + tmpUniforms []uint32 vsyncEnabled bool @@ -1128,7 +1129,7 @@ func (g *graphics12) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics. } shader := g.shaders[shaderID] - adjustedUniforms := adjustUniforms(shader.uniformTypes, shader.uniformOffsets, uniforms) + g.tmpUniforms = appendAdjustedUniforms(g.tmpUniforms[:0], shader.uniformTypes, shader.uniformOffsets, uniforms) w, h := dst.internalSize() g.needFlushDrawCommandList = true @@ -1156,7 +1157,7 @@ func (g *graphics12) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics. Format: _DXGI_FORMAT_R32_UINT, }) - if err := g.pipelineStates.drawTriangles(g.device, g.drawCommandList, g.frameIndex, dst.screen, srcImages, shader, dstRegions, adjustedUniforms, blend, indexOffset, fillRule); err != nil { + if err := g.pipelineStates.drawTriangles(g.device, g.drawCommandList, g.frameIndex, dst.screen, srcImages, shader, dstRegions, g.tmpUniforms, blend, indexOffset, fillRule); err != nil { return err } diff --git a/internal/graphicsdriver/directx/shader11_windows.go b/internal/graphicsdriver/directx/shader11_windows.go index c56448e6d..f119b8623 100644 --- a/internal/graphicsdriver/directx/shader11_windows.go +++ b/internal/graphicsdriver/directx/shader11_windows.go @@ -29,6 +29,7 @@ type shader11 struct { uniformOffsets []int vertexShaderBlob *_ID3DBlob pixelShaderBlob *_ID3DBlob + tmpUniforms []uint32 inputLayout *_ID3D11InputLayout vertexShader *_ID3D11VertexShader @@ -105,12 +106,12 @@ func (s *shader11) use(uniforms []uint32, srcs [graphics.ShaderSrcImageCount]*im s.graphics.deviceContext.PSSetConstantBuffers(0, []*_ID3D11Buffer{cb}) // Send the constant buffer data. - uniforms = adjustUniforms(s.uniformTypes, s.uniformOffsets, uniforms) + s.tmpUniforms = appendAdjustedUniforms(s.tmpUniforms[:0], s.uniformTypes, s.uniformOffsets, uniforms) var mapped _D3D11_MAPPED_SUBRESOURCE if err := s.graphics.deviceContext.Map(unsafe.Pointer(cb), 0, _D3D11_MAP_WRITE_DISCARD, 0, &mapped); err != nil { return err } - copy(unsafe.Slice((*uint32)(mapped.pData), len(uniforms)), uniforms) + copy(unsafe.Slice((*uint32)(mapped.pData), len(s.tmpUniforms)), s.tmpUniforms) s.graphics.deviceContext.Unmap(unsafe.Pointer(cb), 0) // Set the render sources. diff --git a/internal/graphicsdriver/directx/shader_windows.go b/internal/graphicsdriver/directx/shader_windows.go index 2a02c6ed0..07a60534b 100644 --- a/internal/graphicsdriver/directx/shader_windows.go +++ b/internal/graphicsdriver/directx/shader_windows.go @@ -203,40 +203,39 @@ func constantBufferSize(uniformTypes []shaderir.Type, uniformOffsets []int) int return size } -func adjustUniforms(uniformTypes []shaderir.Type, uniformOffsets []int, uniforms []uint32) []uint32 { +func appendAdjustedUniforms(dst []uint32, uniformTypes []shaderir.Type, uniformOffsets []int, uniforms []uint32) []uint32 { // Note that HLSL's matrices are row-major, while GLSL and MSL are column-major. // Transpose matrices so that users can access matrix indices in the same way as GLSL and MSL. // For packing rule, see https://github.com/microsoft/DirectXShaderCompiler/wiki/Buffer-Packing - var fs []uint32 var idx int for i, typ := range uniformTypes { - if len(fs) < uniformOffsets[i] { - fs = append(fs, make([]uint32, uniformOffsets[i]-len(fs))...) + if len(dst) < uniformOffsets[i] { + dst = append(dst, make([]uint32, uniformOffsets[i]-len(dst))...) } n := typ.DwordCount() switch typ.Main { case shaderir.Bool: // Bool is 4 bytes in HLSL. - fs = append(fs, uniforms[idx:idx+1]...) + dst = append(dst, uniforms[idx:idx+1]...) case shaderir.Float: - fs = append(fs, uniforms[idx:idx+1]...) + dst = append(dst, uniforms[idx:idx+1]...) case shaderir.Int: - fs = append(fs, uniforms[idx:idx+1]...) + dst = append(dst, uniforms[idx:idx+1]...) case shaderir.Vec2, shaderir.IVec2: - fs = append(fs, uniforms[idx:idx+2]...) + dst = append(dst, uniforms[idx:idx+2]...) case shaderir.Vec3, shaderir.IVec3: - fs = append(fs, uniforms[idx:idx+3]...) + dst = append(dst, uniforms[idx:idx+3]...) case shaderir.Vec4, shaderir.IVec4: - fs = append(fs, uniforms[idx:idx+4]...) + dst = append(dst, uniforms[idx:idx+4]...) case shaderir.Mat2: - fs = append(fs, + dst = append(dst, uniforms[idx+0], uniforms[idx+2], 0, 0, uniforms[idx+1], uniforms[idx+3], ) case shaderir.Mat3: - fs = append(fs, + dst = append(dst, uniforms[idx+0], uniforms[idx+3], uniforms[idx+6], 0, uniforms[idx+1], uniforms[idx+4], uniforms[idx+7], 0, uniforms[idx+2], uniforms[idx+5], uniforms[idx+8], @@ -246,14 +245,14 @@ func adjustUniforms(uniformTypes []shaderir.Type, uniformOffsets []int, uniforms // In DirectX, the NDC's Y direction (upward) and the framebuffer's Y direction (downward) don't // match. Then, the Y direction must be inverted. // Invert the sign bits as float32 values. - fs = append(fs, + dst = append(dst, uniforms[idx+0], uniforms[idx+4], uniforms[idx+8], uniforms[idx+12], uniforms[idx+1]^(1<<31), uniforms[idx+5]^(1<<31), uniforms[idx+9]^(1<<31), uniforms[idx+13]^(1<<31), uniforms[idx+2], uniforms[idx+6], uniforms[idx+10], uniforms[idx+14], uniforms[idx+3], uniforms[idx+7], uniforms[idx+11], uniforms[idx+15], ) } else { - fs = append(fs, + dst = append(dst, uniforms[idx+0], uniforms[idx+4], uniforms[idx+8], uniforms[idx+12], uniforms[idx+1], uniforms[idx+5], uniforms[idx+9], uniforms[idx+13], uniforms[idx+2], uniforms[idx+6], uniforms[idx+10], uniforms[idx+14], @@ -265,68 +264,68 @@ func adjustUniforms(uniformTypes []shaderir.Type, uniformOffsets []int, uniforms switch typ.Sub[0].Main { case shaderir.Bool: for j := 0; j < typ.Length; j++ { - fs = append(fs, uniforms[idx+j]) + dst = append(dst, uniforms[idx+j]) if j < typ.Length-1 { - fs = append(fs, 0, 0, 0) + dst = append(dst, 0, 0, 0) } } case shaderir.Float: for j := 0; j < typ.Length; j++ { - fs = append(fs, uniforms[idx+j]) + dst = append(dst, uniforms[idx+j]) if j < typ.Length-1 { - fs = append(fs, 0, 0, 0) + dst = append(dst, 0, 0, 0) } } case shaderir.Int: for j := 0; j < typ.Length; j++ { - fs = append(fs, uniforms[idx+j]) + dst = append(dst, uniforms[idx+j]) if j < typ.Length-1 { - fs = append(fs, 0, 0, 0) + dst = append(dst, 0, 0, 0) } } case shaderir.Vec2, shaderir.IVec2: for j := 0; j < typ.Length; j++ { - fs = append(fs, uniforms[idx+2*j:idx+2*(j+1)]...) + dst = append(dst, uniforms[idx+2*j:idx+2*(j+1)]...) if j < typ.Length-1 { - fs = append(fs, 0, 0) + dst = append(dst, 0, 0) } } case shaderir.Vec3, shaderir.IVec3: for j := 0; j < typ.Length; j++ { - fs = append(fs, uniforms[idx+3*j:idx+3*(j+1)]...) + dst = append(dst, uniforms[idx+3*j:idx+3*(j+1)]...) if j < typ.Length-1 { - fs = append(fs, 0) + dst = append(dst, 0) } } case shaderir.Vec4, shaderir.IVec4: - fs = append(fs, uniforms[idx:idx+4*typ.Length]...) + dst = append(dst, uniforms[idx:idx+4*typ.Length]...) case shaderir.Mat2: for j := 0; j < typ.Length; j++ { u := uniforms[idx+4*j : idx+4*(j+1)] - fs = append(fs, + dst = append(dst, u[0], u[2], 0, 0, u[1], u[3], ) if j < typ.Length-1 { - fs = append(fs, 0, 0) + dst = append(dst, 0, 0) } } case shaderir.Mat3: for j := 0; j < typ.Length; j++ { u := uniforms[idx+9*j : idx+9*(j+1)] - fs = append(fs, + dst = append(dst, u[0], u[3], u[6], 0, u[1], u[4], u[7], 0, u[2], u[5], u[8], ) if j < typ.Length-1 { - fs = append(fs, 0) + dst = append(dst, 0) } } case shaderir.Mat4: for j := 0; j < typ.Length; j++ { u := uniforms[idx+16*j : idx+16*(j+1)] - fs = append(fs, + dst = append(dst, u[0], u[4], u[8], u[12], u[1], u[5], u[9], u[13], u[2], u[6], u[10], u[14], @@ -342,5 +341,5 @@ func adjustUniforms(uniformTypes []shaderir.Type, uniformOffsets []int, uniforms idx += n } - return fs + return dst }