From b78d867e7ae338cef85883fc9d6916957e4c1a64 Mon Sep 17 00:00:00 2001
From: Hajime Hoshi <hajimehoshi@gmail.com>
Date: Sat, 14 Mar 2026 14:20:15 +0900
Subject: [PATCH] internal/colormshader: extract ColorM shader logic from
 builtinshader

Create a new internal/colormshader package that owns the color matrix
shader generation, fully independent of internal/builtinshader. It has
its own shader template with ColorM logic baked in, its own
Filter/Address types, and a go:generate pipeline producing defs.go.

This removes the useColorM dimension from internal/builtinshader,
reducing it from 18 shader variants to 9. The colorm package now
imports only colormshader (not builtinshader) for shader sources,
preparing for the v2/v3 split where v2's colorm cannot access v3's
builtinshader.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 colorm/colorm.go                       |  14 +-
 colorm/draw.go                         |   6 +-
 image.go                               |   9 +-
 internal/atlas/shader.go               |   4 +-
 internal/builtinshader/defs.go         |  45 ++-----
 internal/builtinshader/gen.go          |  20 ++-
 internal/builtinshader/shader.go       |  40 +-----
 internal/colormshader/defs.go          |  48 +++++++
 internal/colormshader/gen.go           |  98 ++++++++++++++
 internal/colormshader/shader.go        | 176 +++++++++++++++++++++++++
 internal/graphicscommand/image_test.go |   2 +-
 internal/shaderir/bench_test.go        |   2 +-
 shader.go                              |   8 +-
 13 files changed, 370 insertions(+), 102 deletions(-)
 create mode 100644 internal/colormshader/defs.go
 create mode 100644 internal/colormshader/gen.go
 create mode 100644 internal/colormshader/shader.go

diff --git a/colorm/colorm.go b/colorm/colorm.go
index 29b016387..0d7beeced 100644
--- a/colorm/colorm.go
+++ b/colorm/colorm.go
@@ -21,7 +21,7 @@ import (
 
 	"github.com/hajimehoshi/ebiten/v2"
 	"github.com/hajimehoshi/ebiten/v2/internal/affine"
-	"github.com/hajimehoshi/ebiten/v2/internal/builtinshader"
+	"github.com/hajimehoshi/ebiten/v2/internal/colormshader"
 )
 
 // Dim is the dimension of a ColorM.
@@ -154,14 +154,14 @@ func uniforms(c ColorM) map[string]any {
 	c.affineColorM().Elements(body[:], translation[:])
 
 	uniforms := map[string]any{}
-	uniforms[builtinshader.UniformColorMBody] = body[:]
-	uniforms[builtinshader.UniformColorMTranslation] = translation[:]
+	uniforms[colormshader.UniformColorMBody] = body[:]
+	uniforms[colormshader.UniformColorMTranslation] = translation[:]
 	return uniforms
 }
 
 type builtinShaderKey struct {
-	filter  builtinshader.Filter
-	address builtinshader.Address
+	filter  colormshader.Filter
+	address colormshader.Address
 }
 
 var (
@@ -169,7 +169,7 @@ var (
 	builtinShadersM sync.Mutex
 )
 
-func builtinShader(filter builtinshader.Filter, address builtinshader.Address) *ebiten.Shader {
+func builtinShader(filter colormshader.Filter, address colormshader.Address) *ebiten.Shader {
 	builtinShadersM.Lock()
 	defer builtinShadersM.Unlock()
 
@@ -181,7 +181,7 @@ func builtinShader(filter builtinshader.Filter, address builtinshader.Address) *
 		return s
 	}
 
-	src := builtinshader.ShaderSource(filter, address, true)
+	src := colormshader.ShaderSource(filter, address)
 	s, err := ebiten.NewShader(src)
 	if err != nil {
 		panic(fmt.Sprintf("colorm: NewShader for a built-in shader failed: %v", err))
diff --git a/colorm/draw.go b/colorm/draw.go
index 4dbc1645b..6c7c5b10d 100644
--- a/colorm/draw.go
+++ b/colorm/draw.go
@@ -16,7 +16,7 @@ package colorm
 
 import (
 	"github.com/hajimehoshi/ebiten/v2"
-	"github.com/hajimehoshi/ebiten/v2/internal/builtinshader"
+	"github.com/hajimehoshi/ebiten/v2/internal/colormshader"
 )
 
 // DrawImageOptions represents options for DrawImage.
@@ -58,7 +58,7 @@ func DrawImage(dst, src *ebiten.Image, colorM ColorM, op *DrawImageOptions) {
 	opShader.Blend = op.Blend
 	opShader.Uniforms = uniforms(colorM)
 	opShader.Images[0] = src
-	s := builtinShader(builtinshader.Filter(op.Filter), builtinshader.AddressUnsafe)
+	s := builtinShader(colormshader.Filter(op.Filter), colormshader.AddressUnsafe)
 	dst.DrawRectShader(src.Bounds().Dx(), src.Bounds().Dy(), s, opShader)
 }
 
@@ -129,6 +129,6 @@ func DrawTriangles(dst *ebiten.Image, vertices []ebiten.Vertex, indices []uint16
 	opShader.AntiAlias = op.AntiAlias
 	opShader.Uniforms = uniforms(colorM)
 	opShader.Images[0] = img
-	s := builtinShader(builtinshader.Filter(op.Filter), builtinshader.Address(op.Address))
+	s := builtinShader(colormshader.Filter(op.Filter), colormshader.Address(op.Address))
 	dst.DrawTrianglesShader(vertices, indices, s, opShader)
 }
diff --git a/image.go b/image.go
index d8551ed21..4af976e9d 100644
--- a/image.go
+++ b/image.go
@@ -27,6 +27,7 @@ import (
 	"github.com/hajimehoshi/ebiten/v2/internal/affine"
 	"github.com/hajimehoshi/ebiten/v2/internal/atlas"
 	"github.com/hajimehoshi/ebiten/v2/internal/builtinshader"
+	"github.com/hajimehoshi/ebiten/v2/internal/colormshader"
 	"github.com/hajimehoshi/ebiten/v2/internal/graphics"
 	"github.com/hajimehoshi/ebiten/v2/internal/graphicscommand"
 	"github.com/hajimehoshi/ebiten/v2/internal/graphicsdriver"
@@ -323,8 +324,8 @@ func (i *Image) DrawImage(img *Image, options *DrawImageOptions) {
 		var translation [4]float32
 		colorm.Elements(body[:], translation[:])
 		i.tmpUniforms = shader.appendUniforms(i.tmpUniforms, map[string]any{
-			builtinshader.UniformColorMBody:        body[:],
-			builtinshader.UniformColorMTranslation: translation[:],
+			colormshader.UniformColorMBody:        body[:],
+			colormshader.UniformColorMTranslation: translation[:],
 		})
 	}
 
@@ -685,8 +686,8 @@ func (i *Image) DrawTriangles32(vertices []Vertex, indices []uint32, img *Image,
 		var translation [4]float32
 		colorm.Elements(body[:], translation[:])
 		i.tmpUniforms = shader.appendUniforms(i.tmpUniforms, map[string]any{
-			builtinshader.UniformColorMBody:        body[:],
-			builtinshader.UniformColorMTranslation: translation[:],
+			colormshader.UniformColorMBody:        body[:],
+			colormshader.UniformColorMTranslation: translation[:],
 		})
 	}
 
diff --git a/internal/atlas/shader.go b/internal/atlas/shader.go
index b7bdfd71f..42c1c6a75 100644
--- a/internal/atlas/shader.go
+++ b/internal/atlas/shader.go
@@ -139,7 +139,7 @@ func init() {
 	var wg errgroup.Group
 	var nearestIR, linearIR, clearIR *shaderir.Program
 	wg.Go(func() error {
-		ir, err := graphics.CompileShader([]byte(builtinshader.ShaderSource(builtinshader.FilterNearest, builtinshader.AddressUnsafe, false)))
+		ir, err := graphics.CompileShader([]byte(builtinshader.ShaderSource(builtinshader.FilterNearest, builtinshader.AddressUnsafe)))
 		if err != nil {
 			return fmt.Errorf("atlas: compiling the nearest shader failed: %w", err)
 		}
@@ -147,7 +147,7 @@ func init() {
 		return nil
 	})
 	wg.Go(func() error {
-		ir, err := graphics.CompileShader([]byte(builtinshader.ShaderSource(builtinshader.FilterLinear, builtinshader.AddressUnsafe, false)))
+		ir, err := graphics.CompileShader([]byte(builtinshader.ShaderSource(builtinshader.FilterLinear, builtinshader.AddressUnsafe)))
 		if err != nil {
 			return fmt.Errorf("atlas: compiling the linear shader failed: %w", err)
 		}
diff --git a/internal/builtinshader/defs.go b/internal/builtinshader/defs.go
index ae5b6e8af..39d040fa5 100644
--- a/internal/builtinshader/defs.go
+++ b/internal/builtinshader/defs.go
@@ -21,55 +21,28 @@
 package builtinshader
 
 //ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0UnsafeAt(srcPos)\n\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\n\treturn clr\n}\n\n"
+const _ = "//kage:unit pixels\n\npackage main\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0UnsafeAt(srcPos)\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\treturn clr\n}\n\n"
 
 //ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0UnsafeAt(srcPos)\n\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\n\treturn clr\n}\n\n"
+const _ = "//kage:unit pixels\n\npackage main\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0At(srcPos)\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\treturn clr\n}\n\n"
 
 //ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0At(srcPos)\n\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\n\treturn clr\n}\n\n"
+const _ = "//kage:unit pixels\n\npackage main\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0At(adjustSrcPosForAddressRepeat(srcPos))\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\treturn clr\n}\n\n"
 
 //ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0At(srcPos)\n\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\n\treturn clr\n}\n\n"
+const _ = "//kage:unit pixels\n\npackage main\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\n\n\tc0 := imageSrc0UnsafeAt(p0)\n\tc1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))\n\tc2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))\n\tc3 := imageSrc0UnsafeAt(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\treturn clr\n}\n\n"
 
 //ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0At(adjustSrcPosForAddressRepeat(srcPos))\n\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\n\treturn clr\n}\n\n"
+const _ = "//kage:unit pixels\n\npackage main\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\treturn clr\n}\n\n"
 
 //ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0At(adjustSrcPosForAddressRepeat(srcPos))\n\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\n\treturn clr\n}\n\n"
+const _ = "//kage:unit pixels\n\npackage main\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\tp0 = adjustSrcPosForAddressRepeat(p0)\n\tp1 = adjustSrcPosForAddressRepeat(p1)\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\treturn clr\n}\n\n"
 
 //ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\n\n\tc0 := imageSrc0UnsafeAt(p0)\n\tc1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))\n\tc2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))\n\tc3 := imageSrc0UnsafeAt(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\n\treturn clr\n}\n\n"
+const _ = "//kage:unit pixels\n\npackage main\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\n\n\tc0 := imageSrc0UnsafeAt(p0)\n\tc1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))\n\tc2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))\n\tc3 := imageSrc0UnsafeAt(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\treturn clr\n}\n\n"
 
 //ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\n\n\tc0 := imageSrc0UnsafeAt(p0)\n\tc1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))\n\tc2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))\n\tc3 := imageSrc0UnsafeAt(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\n\treturn clr\n}\n\n"
+const _ = "//kage:unit pixels\n\npackage main\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\treturn clr\n}\n\n"
 
 //ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\n\treturn clr\n}\n\n"
-
-//ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\n\treturn clr\n}\n\n"
-
-//ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\tp0 = adjustSrcPosForAddressRepeat(p0)\n\tp1 = adjustSrcPosForAddressRepeat(p1)\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\n\treturn clr\n}\n\n"
-
-//ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\tp0 = adjustSrcPosForAddressRepeat(p0)\n\tp1 = adjustSrcPosForAddressRepeat(p1)\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\n\treturn clr\n}\n\n"
-
-//ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\n\n\tc0 := imageSrc0UnsafeAt(p0)\n\tc1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))\n\tc2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))\n\tc3 := imageSrc0UnsafeAt(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\n\treturn clr\n}\n\n"
-
-//ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\n\n\tc0 := imageSrc0UnsafeAt(p0)\n\tc1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))\n\tc2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))\n\tc3 := imageSrc0UnsafeAt(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\n\treturn clr\n}\n\n"
-
-//ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\n\treturn clr\n}\n\n"
-
-//ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\n\treturn clr\n}\n\n"
-
-//ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\tp0 = adjustSrcPosForAddressRepeat(p0)\n\tp1 = adjustSrcPosForAddressRepeat(p1)\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\n\treturn clr\n}\n\n"
-
-//ebitengine:shadersource
-const _ = "//kage:unit pixels\n\npackage main\n\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\tp0 = adjustSrcPosForAddressRepeat(p0)\n\tp1 = adjustSrcPosForAddressRepeat(p1)\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\n\treturn clr\n}\n\n"
+const _ = "//kage:unit pixels\n\npackage main\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\tp0 = adjustSrcPosForAddressRepeat(p0)\n\tp1 = adjustSrcPosForAddressRepeat(p1)\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Apply the color scale.\n\tclr *= color\n\n\treturn clr\n}\n\n"
diff --git a/internal/builtinshader/gen.go b/internal/builtinshader/gen.go
index 8ff7ea90a..f47a402c3 100644
--- a/internal/builtinshader/gen.go
+++ b/internal/builtinshader/gen.go
@@ -78,17 +78,15 @@ func xmain() error {
 
 	for filter := builtinshader.Filter(0); filter < builtinshader.FilterCount; filter++ {
 		for address := builtinshader.Address(0); address < builtinshader.AddressCount; address++ {
-			for _, useColorM := range []bool{false, true} {
-				s := builtinshader.ShaderSource(filter, address, useColorM)
-				if _, err := w.WriteString("\n"); err != nil {
-					return err
-				}
-				if _, err := w.WriteString("//ebitengine:shadersource\n"); err != nil {
-					return err
-				}
-				if _, err := fmt.Fprintf(w, "const _ = %q\n", s); err != nil {
-					return err
-				}
+			s := builtinshader.ShaderSource(filter, address)
+			if _, err := w.WriteString("\n"); err != nil {
+				return err
+			}
+			if _, err := w.WriteString("//ebitengine:shadersource\n"); err != nil {
+				return err
+			}
+			if _, err := fmt.Fprintf(w, "const _ = %q\n", s); err != nil {
+				return err
 			}
 		}
 	}
diff --git a/internal/builtinshader/shader.go b/internal/builtinshader/shader.go
index 65242e945..8cbc7cdea 100644
--- a/internal/builtinshader/shader.go
+++ b/internal/builtinshader/shader.go
@@ -44,13 +44,8 @@ const (
 
 const AddressCount = 3
 
-const (
-	UniformColorMBody        = "ColorMBody"
-	UniformColorMTranslation = "ColorMTranslation"
-)
-
 var (
-	shaders  [FilterCount][AddressCount][2][]byte
+	shaders  [FilterCount][AddressCount][]byte
 	shadersM sync.Mutex
 )
 
@@ -58,11 +53,6 @@ var tmpl = template.Must(template.New("tmpl").Parse(`//kage:unit pixels
 
 package main
 
-{{if .UseColorM}}
-var ColorMBody mat4
-var ColorMTranslation vec4
-{{end}}
-
 {{if eq .Address .AddressRepeat}}
 func adjustSrcPosForAddressRepeat(p vec2) vec2 {
 	origin := imageSrc0Origin()
@@ -119,22 +109,8 @@ func Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {
 	clr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)
 {{end}}
 
-{{if .UseColorM}}
-	// Un-premultiply alpha.
-	// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.
-	clr.rgb /= clr.a + (1-sign(clr.a))
-	// Apply the clr matrix.
-	clr = (ColorMBody * clr) + ColorMTranslation
-	// Premultiply alpha
-	clr.rgb *= clr.a
 	// Apply the color scale.
 	clr *= color
-	// Clamp the output.
-	clr.rgb = min(clr.rgb, clr.a)
-{{else}}
-	// Apply the color scale.
-	clr *= color
-{{end}}
 
 	return clr
 }
@@ -142,17 +118,11 @@ func Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {
 `))
 
 // ShaderSource returns the built-in shader source based on the given parameters.
-//
-// The returned shader always uses a color matrix so far.
-func ShaderSource(filter Filter, address Address, useColorM bool) []byte {
+func ShaderSource(filter Filter, address Address) []byte {
 	shadersM.Lock()
 	defer shadersM.Unlock()
 
-	var c int
-	if useColorM {
-		c = 1
-	}
-	if s := shaders[filter][address][c]; s != nil {
+	if s := shaders[filter][address]; s != nil {
 		return s
 	}
 
@@ -166,7 +136,6 @@ func ShaderSource(filter Filter, address Address, useColorM bool) []byte {
 		AddressUnsafe      Address
 		AddressClampToZero Address
 		AddressRepeat      Address
-		UseColorM          bool
 	}{
 		Filter:             filter,
 		FilterNearest:      FilterNearest,
@@ -176,13 +145,12 @@ func ShaderSource(filter Filter, address Address, useColorM bool) []byte {
 		AddressUnsafe:      AddressUnsafe,
 		AddressClampToZero: AddressClampToZero,
 		AddressRepeat:      AddressRepeat,
-		UseColorM:          useColorM,
 	}); err != nil {
 		panic(fmt.Sprintf("builtinshader: tmpl.Execute failed: %v", err))
 	}
 
 	b := buf.Bytes()
-	shaders[filter][address][c] = b
+	shaders[filter][address] = b
 	return b
 }
 
diff --git a/internal/colormshader/defs.go b/internal/colormshader/defs.go
new file mode 100644
index 000000000..c22696430
--- /dev/null
+++ b/internal/colormshader/defs.go
@@ -0,0 +1,48 @@
+// Code generated by gen.go using 'go generate'. DO NOT EDIT.
+
+// Copyright 2026 The Ebitengine Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file is intended for precompiled shaders that will be introduced in the future.
+// All constant names are underscores and not actually used,
+// so they do not affect the binary file size.
+
+package colormshader
+
+//ebitengine:shadersource
+const _ = "//kage:unit pixels\n\npackage main\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0UnsafeAt(srcPos)\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\treturn clr\n}\n\n"
+
+//ebitengine:shadersource
+const _ = "//kage:unit pixels\n\npackage main\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0At(srcPos)\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\treturn clr\n}\n\n"
+
+//ebitengine:shadersource
+const _ = "//kage:unit pixels\n\npackage main\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tclr := imageSrc0At(adjustSrcPosForAddressRepeat(srcPos))\n\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\treturn clr\n}\n\n"
+
+//ebitengine:shadersource
+const _ = "//kage:unit pixels\n\npackage main\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\n\n\tc0 := imageSrc0UnsafeAt(p0)\n\tc1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))\n\tc2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))\n\tc3 := imageSrc0UnsafeAt(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\treturn clr\n}\n\n"
+
+//ebitengine:shadersource
+const _ = "//kage:unit pixels\n\npackage main\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\treturn clr\n}\n\n"
+
+//ebitengine:shadersource
+const _ = "//kage:unit pixels\n\npackage main\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\tp0 := srcPos - 1/2.0\n\tp1 := srcPos + 1/2.0\n\n\n\n\tp0 = adjustSrcPosForAddressRepeat(p0)\n\tp1 = adjustSrcPosForAddressRepeat(p1)\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := fract(p1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\treturn clr\n}\n\n"
+
+//ebitengine:shadersource
+const _ = "//kage:unit pixels\n\npackage main\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\n\n\tc0 := imageSrc0UnsafeAt(p0)\n\tc1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))\n\tc2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))\n\tc3 := imageSrc0UnsafeAt(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\treturn clr\n}\n\n"
+
+//ebitengine:shadersource
+const _ = "//kage:unit pixels\n\npackage main\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\treturn clr\n}\n\n"
+
+//ebitengine:shadersource
+const _ = "//kage:unit pixels\n\npackage main\n\nvar ColorMBody mat4\nvar ColorMTranslation vec4\n\n\nfunc adjustSrcPosForAddressRepeat(p vec2) vec2 {\n\torigin := imageSrc0Origin()\n\tsize := imageSrc0Size()\n\treturn mod(p - origin, size) + origin\n}\n\n\nfunc Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {\n\n\n\t// inversedScale is the size of the region on the source image.\n\t// The size is the inverse of the geometry-matrix scale.\n\tinversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))\n\t// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).\n\tinversedScale = min(inversedScale, vec2(1))\n\tp0 := srcPos - inversedScale/2.0\n\tp1 := srcPos + inversedScale/2.0\n\n\n\n\tp0 = adjustSrcPosForAddressRepeat(p0)\n\tp1 = adjustSrcPosForAddressRepeat(p1)\n\n\n\n\tc0 := imageSrc0At(p0)\n\tc1 := imageSrc0At(vec2(p1.x, p0.y))\n\tc2 := imageSrc0At(vec2(p0.x, p1.y))\n\tc3 := imageSrc0At(p1)\n\n\n\n\trate := clamp(fract(p1)/inversedScale, 0, 1)\n\n\tclr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)\n\n\n\t// Un-premultiply alpha.\n\t// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.\n\tclr.rgb /= clr.a + (1-sign(clr.a))\n\t// Apply the clr matrix.\n\tclr = (ColorMBody * clr) + ColorMTranslation\n\t// Premultiply alpha\n\tclr.rgb *= clr.a\n\t// Apply the color scale.\n\tclr *= color\n\t// Clamp the output.\n\tclr.rgb = min(clr.rgb, clr.a)\n\n\treturn clr\n}\n\n"
diff --git a/internal/colormshader/gen.go b/internal/colormshader/gen.go
new file mode 100644
index 000000000..984535033
--- /dev/null
+++ b/internal/colormshader/gen.go
@@ -0,0 +1,98 @@
+// Copyright 2026 The Ebitengine Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build ignore
+
+package main
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+
+	"github.com/hajimehoshi/ebiten/v2/internal/colormshader"
+)
+
+func main() {
+	if err := xmain(); err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+}
+
+const license = `// Copyright 2026 The Ebitengine Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+`
+
+const note = `// This file is intended for precompiled shaders that will be introduced in the future.
+// All constant names are underscores and not actually used,
+// so they do not affect the binary file size.
+`
+
+func xmain() error {
+	f, err := os.Create("defs.go")
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	w := bufio.NewWriter(f)
+
+	if _, err := w.WriteString("// Code generated by gen.go using 'go generate'. DO NOT EDIT.\n\n"); err != nil {
+		return err
+	}
+	if _, err := w.WriteString(license); err != nil {
+		return err
+	}
+	if _, err := w.WriteString("\n"); err != nil {
+		return err
+	}
+	if _, err := w.WriteString(note); err != nil {
+		return err
+	}
+	if _, err := w.WriteString("\npackage colormshader\n"); err != nil {
+		return err
+	}
+
+	for filter := colormshader.Filter(0); filter < colormshader.FilterCount; filter++ {
+		for address := colormshader.Address(0); address < colormshader.AddressCount; address++ {
+			s := colormshader.ShaderSource(filter, address)
+			if _, err := w.WriteString("\n"); err != nil {
+				return err
+			}
+			if _, err := w.WriteString("//ebitengine:shadersource\n"); err != nil {
+				return err
+			}
+			if _, err := fmt.Fprintf(w, "const _ = %q\n", s); err != nil {
+				return err
+			}
+		}
+	}
+
+	if err := w.Flush(); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/internal/colormshader/shader.go b/internal/colormshader/shader.go
new file mode 100644
index 000000000..1b39e4a65
--- /dev/null
+++ b/internal/colormshader/shader.go
@@ -0,0 +1,176 @@
+// Copyright 2026 The Ebitengine Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:generate go run gen.go
+//go:generate gofmt -s -w .
+
+package colormshader
+
+import (
+	"bytes"
+	"fmt"
+	"sync"
+	"text/template"
+)
+
+// Filter must have the same values as builtinshader.Filter.
+// In v3, colormshader can be moved into the colorm package, making syncing easier.
+type Filter int
+
+const (
+	FilterNearest Filter = iota
+	FilterLinear
+	FilterPixelated
+)
+
+const FilterCount = 3
+
+// Address must have the same values as builtinshader.Address.
+// In v3, colormshader can be moved into the colorm package, making syncing easier.
+type Address int
+
+const (
+	AddressUnsafe Address = iota
+	AddressClampToZero
+	AddressRepeat
+)
+
+const AddressCount = 3
+
+const (
+	UniformColorMBody        = "ColorMBody"
+	UniformColorMTranslation = "ColorMTranslation"
+)
+
+var (
+	shaders  [FilterCount][AddressCount][]byte
+	shadersM sync.Mutex
+)
+
+var tmpl = template.Must(template.New("tmpl").Parse(`//kage:unit pixels
+
+package main
+
+var ColorMBody mat4
+var ColorMTranslation vec4
+
+{{if eq .Address .AddressRepeat}}
+func adjustSrcPosForAddressRepeat(p vec2) vec2 {
+	origin := imageSrc0Origin()
+	size := imageSrc0Size()
+	return mod(p - origin, size) + origin
+}
+{{end}}
+
+func Fragment(dstPos vec4, srcPos vec2, color vec4) vec4 {
+{{if eq .Filter .FilterNearest}}
+{{if eq .Address .AddressUnsafe}}
+	clr := imageSrc0UnsafeAt(srcPos)
+{{else if eq .Address .AddressClampToZero}}
+	clr := imageSrc0At(srcPos)
+{{else if eq .Address .AddressRepeat}}
+	clr := imageSrc0At(adjustSrcPosForAddressRepeat(srcPos))
+{{end}}
+{{else}}
+{{if eq .Filter .FilterLinear}}
+	p0 := srcPos - 1/2.0
+	p1 := srcPos + 1/2.0
+{{else if eq .Filter .FilterPixelated}}
+	// inversedScale is the size of the region on the source image.
+	// The size is the inverse of the geometry-matrix scale.
+	inversedScale := vec2(abs(dfdx(srcPos.x)), abs(dfdy(srcPos.y)))
+	// Cap the inversedScale to 1 as dfdx/dfdy is not accurate on some machines (#3182).
+	inversedScale = min(inversedScale, vec2(1))
+	p0 := srcPos - inversedScale/2.0
+	p1 := srcPos + inversedScale/2.0
+{{end}}
+
+{{if eq .Address .AddressRepeat}}
+	p0 = adjustSrcPosForAddressRepeat(p0)
+	p1 = adjustSrcPosForAddressRepeat(p1)
+{{end}}
+
+{{if eq .Address .AddressUnsafe}}
+	c0 := imageSrc0UnsafeAt(p0)
+	c1 := imageSrc0UnsafeAt(vec2(p1.x, p0.y))
+	c2 := imageSrc0UnsafeAt(vec2(p0.x, p1.y))
+	c3 := imageSrc0UnsafeAt(p1)
+{{else}}
+	c0 := imageSrc0At(p0)
+	c1 := imageSrc0At(vec2(p1.x, p0.y))
+	c2 := imageSrc0At(vec2(p0.x, p1.y))
+	c3 := imageSrc0At(p1)
+{{end}}
+
+{{if eq .Filter .FilterLinear}}
+	rate := fract(p1)
+{{else if eq .Filter .FilterPixelated}}
+	rate := clamp(fract(p1)/inversedScale, 0, 1)
+{{end}}
+	clr := mix(mix(c0, c1, rate.x), mix(c2, c3, rate.x), rate.y)
+{{end}}
+
+	// Un-premultiply alpha.
+	// When the alpha is 0, 1-sign(alpha) is 1.0, which means division does nothing.
+	clr.rgb /= clr.a + (1-sign(clr.a))
+	// Apply the clr matrix.
+	clr = (ColorMBody * clr) + ColorMTranslation
+	// Premultiply alpha
+	clr.rgb *= clr.a
+	// Apply the color scale.
+	clr *= color
+	// Clamp the output.
+	clr.rgb = min(clr.rgb, clr.a)
+
+	return clr
+}
+
+`))
+
+// ShaderSource returns the ColorM shader source based on the given parameters.
+func ShaderSource(filter Filter, address Address) []byte {
+	shadersM.Lock()
+	defer shadersM.Unlock()
+
+	if s := shaders[filter][address]; s != nil {
+		return s
+	}
+
+	var buf bytes.Buffer
+	if err := tmpl.Execute(&buf, struct {
+		Filter             Filter
+		FilterNearest      Filter
+		FilterLinear       Filter
+		FilterPixelated    Filter
+		Address            Address
+		AddressUnsafe      Address
+		AddressClampToZero Address
+		AddressRepeat      Address
+	}{
+		Filter:             filter,
+		FilterNearest:      FilterNearest,
+		FilterLinear:       FilterLinear,
+		FilterPixelated:    FilterPixelated,
+		Address:            address,
+		AddressUnsafe:      AddressUnsafe,
+		AddressClampToZero: AddressClampToZero,
+		AddressRepeat:      AddressRepeat,
+	}); err != nil {
+		panic(fmt.Sprintf("colormshader: tmpl.Execute failed: %v", err))
+	}
+
+	b := buf.Bytes()
+	shaders[filter][address] = b
+	return b
+}
diff --git a/internal/graphicscommand/image_test.go b/internal/graphicscommand/image_test.go
index 65982dacb..20089cf38 100644
--- a/internal/graphicscommand/image_test.go
+++ b/internal/graphicscommand/image_test.go
@@ -31,7 +31,7 @@ import (
 var nearestFilterShader *graphicscommand.Shader
 
 func init() {
-	ir, err := graphics.CompileShader([]byte(builtinshader.ShaderSource(builtinshader.FilterNearest, builtinshader.AddressUnsafe, false)))
+	ir, err := graphics.CompileShader([]byte(builtinshader.ShaderSource(builtinshader.FilterNearest, builtinshader.AddressUnsafe)))
 	if err != nil {
 		panic(fmt.Sprintf("graphicscommand: compiling the nearest shader failed: %v", err))
 	}
diff --git a/internal/shaderir/bench_test.go b/internal/shaderir/bench_test.go
index a0c8858d1..d969c0a11 100644
--- a/internal/shaderir/bench_test.go
+++ b/internal/shaderir/bench_test.go
@@ -22,7 +22,7 @@ import (
 )
 
 func BenchmarkFilter(b *testing.B) {
-	src := builtinshader.ShaderSource(builtinshader.FilterNearest, builtinshader.AddressUnsafe, false)
+	src := builtinshader.ShaderSource(builtinshader.FilterNearest, builtinshader.AddressUnsafe)
 	s, err := graphics.CompileShader(src)
 	if err != nil {
 		b.Fatal(err)
diff --git a/shader.go b/shader.go
index ee6bd2510..2b95375a7 100644
--- a/shader.go
+++ b/shader.go
@@ -20,6 +20,7 @@ import (
 	"sync/atomic"
 
 	"github.com/hajimehoshi/ebiten/v2/internal/builtinshader"
+	"github.com/hajimehoshi/ebiten/v2/internal/colormshader"
 	"github.com/hajimehoshi/ebiten/v2/internal/graphics"
 	"github.com/hajimehoshi/ebiten/v2/internal/shaderir"
 	"github.com/hajimehoshi/ebiten/v2/internal/ui"
@@ -122,7 +123,12 @@ func builtinShader(filter builtinshader.Filter, address builtinshader.Address, u
 			shader = &Shader{shader: ui.LinearFilterShader}
 		}
 	} else {
-		src := builtinshader.ShaderSource(filter, address, useColorM)
+		var src []byte
+		if useColorM {
+			src = colormshader.ShaderSource(colormshader.Filter(filter), colormshader.Address(address))
+		} else {
+			src = builtinshader.ShaderSource(filter, address)
+		}
 		var name string
 		switch filter {
 		case builtinshader.FilterNearest: