internal/graphicsdriver/directx: avoid allocation in adjustUniforms

Closes #3265
This commit is contained in:
Hajime Hoshi
2025-06-28 01:04:05 +09:00
parent 8a946ca221
commit ff26882df5
3 changed files with 36 additions and 35 deletions
@@ -94,6 +94,7 @@ type graphics12 struct {
shaders map[graphicsdriver.ShaderID]*shader12
nextShaderID graphicsdriver.ShaderID
disposedShaders [frameCount][]*shader12
tmpUniforms []uint32
vsyncEnabled bool
@@ -1128,7 +1129,7 @@ func (g *graphics12) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.
}
shader := g.shaders[shaderID]
adjustedUniforms := adjustUniforms(shader.uniformTypes, shader.uniformOffsets, uniforms)
g.tmpUniforms = appendAdjustedUniforms(g.tmpUniforms[:0], shader.uniformTypes, shader.uniformOffsets, uniforms)
w, h := dst.internalSize()
g.needFlushDrawCommandList = true
@@ -1156,7 +1157,7 @@ func (g *graphics12) DrawTriangles(dstID graphicsdriver.ImageID, srcs [graphics.
Format: _DXGI_FORMAT_R32_UINT,
})
if err := g.pipelineStates.drawTriangles(g.device, g.drawCommandList, g.frameIndex, dst.screen, srcImages, shader, dstRegions, adjustedUniforms, blend, indexOffset, fillRule); err != nil {
if err := g.pipelineStates.drawTriangles(g.device, g.drawCommandList, g.frameIndex, dst.screen, srcImages, shader, dstRegions, g.tmpUniforms, blend, indexOffset, fillRule); err != nil {
return err
}
@@ -29,6 +29,7 @@ type shader11 struct {
uniformOffsets []int
vertexShaderBlob *_ID3DBlob
pixelShaderBlob *_ID3DBlob
tmpUniforms []uint32
inputLayout *_ID3D11InputLayout
vertexShader *_ID3D11VertexShader
@@ -105,12 +106,12 @@ func (s *shader11) use(uniforms []uint32, srcs [graphics.ShaderSrcImageCount]*im
s.graphics.deviceContext.PSSetConstantBuffers(0, []*_ID3D11Buffer{cb})
// Send the constant buffer data.
uniforms = adjustUniforms(s.uniformTypes, s.uniformOffsets, uniforms)
s.tmpUniforms = appendAdjustedUniforms(s.tmpUniforms[:0], s.uniformTypes, s.uniformOffsets, uniforms)
var mapped _D3D11_MAPPED_SUBRESOURCE
if err := s.graphics.deviceContext.Map(unsafe.Pointer(cb), 0, _D3D11_MAP_WRITE_DISCARD, 0, &mapped); err != nil {
return err
}
copy(unsafe.Slice((*uint32)(mapped.pData), len(uniforms)), uniforms)
copy(unsafe.Slice((*uint32)(mapped.pData), len(s.tmpUniforms)), s.tmpUniforms)
s.graphics.deviceContext.Unmap(unsafe.Pointer(cb), 0)
// Set the render sources.
@@ -203,40 +203,39 @@ func constantBufferSize(uniformTypes []shaderir.Type, uniformOffsets []int) int
return size
}
func adjustUniforms(uniformTypes []shaderir.Type, uniformOffsets []int, uniforms []uint32) []uint32 {
func appendAdjustedUniforms(dst []uint32, uniformTypes []shaderir.Type, uniformOffsets []int, uniforms []uint32) []uint32 {
// Note that HLSL's matrices are row-major, while GLSL and MSL are column-major.
// Transpose matrices so that users can access matrix indices in the same way as GLSL and MSL.
// For packing rule, see https://github.com/microsoft/DirectXShaderCompiler/wiki/Buffer-Packing
var fs []uint32
var idx int
for i, typ := range uniformTypes {
if len(fs) < uniformOffsets[i] {
fs = append(fs, make([]uint32, uniformOffsets[i]-len(fs))...)
if len(dst) < uniformOffsets[i] {
dst = append(dst, make([]uint32, uniformOffsets[i]-len(dst))...)
}
n := typ.DwordCount()
switch typ.Main {
case shaderir.Bool:
// Bool is 4 bytes in HLSL.
fs = append(fs, uniforms[idx:idx+1]...)
dst = append(dst, uniforms[idx:idx+1]...)
case shaderir.Float:
fs = append(fs, uniforms[idx:idx+1]...)
dst = append(dst, uniforms[idx:idx+1]...)
case shaderir.Int:
fs = append(fs, uniforms[idx:idx+1]...)
dst = append(dst, uniforms[idx:idx+1]...)
case shaderir.Vec2, shaderir.IVec2:
fs = append(fs, uniforms[idx:idx+2]...)
dst = append(dst, uniforms[idx:idx+2]...)
case shaderir.Vec3, shaderir.IVec3:
fs = append(fs, uniforms[idx:idx+3]...)
dst = append(dst, uniforms[idx:idx+3]...)
case shaderir.Vec4, shaderir.IVec4:
fs = append(fs, uniforms[idx:idx+4]...)
dst = append(dst, uniforms[idx:idx+4]...)
case shaderir.Mat2:
fs = append(fs,
dst = append(dst,
uniforms[idx+0], uniforms[idx+2], 0, 0,
uniforms[idx+1], uniforms[idx+3],
)
case shaderir.Mat3:
fs = append(fs,
dst = append(dst,
uniforms[idx+0], uniforms[idx+3], uniforms[idx+6], 0,
uniforms[idx+1], uniforms[idx+4], uniforms[idx+7], 0,
uniforms[idx+2], uniforms[idx+5], uniforms[idx+8],
@@ -246,14 +245,14 @@ func adjustUniforms(uniformTypes []shaderir.Type, uniformOffsets []int, uniforms
// In DirectX, the NDC's Y direction (upward) and the framebuffer's Y direction (downward) don't
// match. Then, the Y direction must be inverted.
// Invert the sign bits as float32 values.
fs = append(fs,
dst = append(dst,
uniforms[idx+0], uniforms[idx+4], uniforms[idx+8], uniforms[idx+12],
uniforms[idx+1]^(1<<31), uniforms[idx+5]^(1<<31), uniforms[idx+9]^(1<<31), uniforms[idx+13]^(1<<31),
uniforms[idx+2], uniforms[idx+6], uniforms[idx+10], uniforms[idx+14],
uniforms[idx+3], uniforms[idx+7], uniforms[idx+11], uniforms[idx+15],
)
} else {
fs = append(fs,
dst = append(dst,
uniforms[idx+0], uniforms[idx+4], uniforms[idx+8], uniforms[idx+12],
uniforms[idx+1], uniforms[idx+5], uniforms[idx+9], uniforms[idx+13],
uniforms[idx+2], uniforms[idx+6], uniforms[idx+10], uniforms[idx+14],
@@ -265,68 +264,68 @@ func adjustUniforms(uniformTypes []shaderir.Type, uniformOffsets []int, uniforms
switch typ.Sub[0].Main {
case shaderir.Bool:
for j := 0; j < typ.Length; j++ {
fs = append(fs, uniforms[idx+j])
dst = append(dst, uniforms[idx+j])
if j < typ.Length-1 {
fs = append(fs, 0, 0, 0)
dst = append(dst, 0, 0, 0)
}
}
case shaderir.Float:
for j := 0; j < typ.Length; j++ {
fs = append(fs, uniforms[idx+j])
dst = append(dst, uniforms[idx+j])
if j < typ.Length-1 {
fs = append(fs, 0, 0, 0)
dst = append(dst, 0, 0, 0)
}
}
case shaderir.Int:
for j := 0; j < typ.Length; j++ {
fs = append(fs, uniforms[idx+j])
dst = append(dst, uniforms[idx+j])
if j < typ.Length-1 {
fs = append(fs, 0, 0, 0)
dst = append(dst, 0, 0, 0)
}
}
case shaderir.Vec2, shaderir.IVec2:
for j := 0; j < typ.Length; j++ {
fs = append(fs, uniforms[idx+2*j:idx+2*(j+1)]...)
dst = append(dst, uniforms[idx+2*j:idx+2*(j+1)]...)
if j < typ.Length-1 {
fs = append(fs, 0, 0)
dst = append(dst, 0, 0)
}
}
case shaderir.Vec3, shaderir.IVec3:
for j := 0; j < typ.Length; j++ {
fs = append(fs, uniforms[idx+3*j:idx+3*(j+1)]...)
dst = append(dst, uniforms[idx+3*j:idx+3*(j+1)]...)
if j < typ.Length-1 {
fs = append(fs, 0)
dst = append(dst, 0)
}
}
case shaderir.Vec4, shaderir.IVec4:
fs = append(fs, uniforms[idx:idx+4*typ.Length]...)
dst = append(dst, uniforms[idx:idx+4*typ.Length]...)
case shaderir.Mat2:
for j := 0; j < typ.Length; j++ {
u := uniforms[idx+4*j : idx+4*(j+1)]
fs = append(fs,
dst = append(dst,
u[0], u[2], 0, 0,
u[1], u[3],
)
if j < typ.Length-1 {
fs = append(fs, 0, 0)
dst = append(dst, 0, 0)
}
}
case shaderir.Mat3:
for j := 0; j < typ.Length; j++ {
u := uniforms[idx+9*j : idx+9*(j+1)]
fs = append(fs,
dst = append(dst,
u[0], u[3], u[6], 0,
u[1], u[4], u[7], 0,
u[2], u[5], u[8],
)
if j < typ.Length-1 {
fs = append(fs, 0)
dst = append(dst, 0)
}
}
case shaderir.Mat4:
for j := 0; j < typ.Length; j++ {
u := uniforms[idx+16*j : idx+16*(j+1)]
fs = append(fs,
dst = append(dst,
u[0], u[4], u[8], u[12],
u[1], u[5], u[9], u[13],
u[2], u[6], u[10], u[14],
@@ -342,5 +341,5 @@ func adjustUniforms(uniformTypes []shaderir.Type, uniformOffsets []int, uniforms
idx += n
}
return fs
return dst
}