internal/literals: restrict the use of expensive obfuscators

The added benchmark script shows these numbers for building and running with vanilla go on literal sizes between 16B and 2048B, showing that vanilla Go isn't affected at all by these sizes: │ go │ │ sec/op │ Build/16B 118.1m ± ∞ ¹ Build/64B 123.1m ± ∞ ¹ Build/256B 119.7m ± ∞ ¹ Build/1024B 119.1m ± ∞ ¹ Build/2048B 124.3m ± ∞ ¹ Run/16B 1.671m ± ∞ ¹ Run/64B 1.143m ± ∞ ¹ Run/256B 1.190m ± ∞ ¹ Run/1024B 1.222m ± ∞ ¹ Run/2048B 1.080m ± ∞ ¹ Our simple and swap obfuscators scale pretty well to these same sizes, only causing moderate slow-downs to build and runtime speeds: │ simple │ swap │ │ sec/op vs base │ sec/op vs base │ Build/16B 268.0m ± ∞ ¹ +126.88% (p=1.000 n=1) 262.0m ± ∞ ¹ +121.80% (p=1.000 n=1) Build/64B 253.8m ± ∞ ¹ +106.16% (p=1.000 n=1) 252.4m ± ∞ ¹ +105.00% (p=1.000 n=1) Build/256B 265.4m ± ∞ ¹ +121.78% (p=1.000 n=1) 276.7m ± ∞ ¹ +131.16% (p=1.000 n=1) Build/1024B 267.4m ± ∞ ¹ +124.44% (p=1.000 n=1) 315.0m ± ∞ ¹ +164.48% (p=1.000 n=1) Build/2048B 277.4m ± ∞ ¹ +123.11% (p=1.000 n=1) 383.8m ± ∞ ¹ +208.70% (p=1.000 n=1) Run/16B 1.740m ± ∞ ¹ +4.12% (p=1.000 n=1) 1.463m ± ∞ ¹ -12.47% (p=1.000 n=1) Run/64B 1.470m ± ∞ ¹ +28.66% (p=1.000 n=1) 1.455m ± ∞ ¹ +27.35% (p=1.000 n=1) Run/256B 1.729m ± ∞ ¹ +45.25% (p=1.000 n=1) 1.812m ± ∞ ¹ +52.26% (p=1.000 n=1) Run/1024B 1.315m ± ∞ ¹ +7.62% (p=1.000 n=1) 1.352m ± ∞ ¹ +10.60% (p=1.000 n=1) Run/2048B 1.425m ± ∞ ¹ +31.93% (p=1.000 n=1) 1.316m ± ∞ ¹ +21.88% (p=1.000 n=1) However, the other three cause huge slow-downs in both build and runtime speeds: │ split │ shuffle │ seed │ │ sec/op vs base │ sec/op vs base │ sec/op vs base │ Build/16B 326.6m ± ∞ ¹ +176.53% (p=1.000 n=1) 363.1m ± ∞ ¹ +207.44% (p=1.000 n=1) 217.4m ± ∞ ¹ +84.05% (p=1.000 n=1) Build/64B 400.6m ± ∞ ¹ +225.34% (p=1.000 n=1) 328.0m ± ∞ ¹ +166.39% (p=1.000 n=1) 262.1m ± ∞ ¹ +112.87% (p=1.000 n=1) Build/256B 824.7m ± ∞ ¹ +589.08% (p=1.000 n=1) 588.2m ± ∞ ¹ +391.45% (p=1.000 n=1) 873.7m ± ∞ ¹ +630.05% (p=1.000 n=1) Build/1024B 3257.7m ± ∞ ¹ +2634.84% (p=1.000 n=1) 1671.4m ± ∞ ¹ +1303.15% (p=1.000 n=1) 5000.0m ± ∞ ¹ +4097.53% (p=1.000 n=1) Build/2048B 5000.0m ± ∞ ¹ +3921.73% (p=1.000 n=1) 3936.4m ± ∞ ¹ +3066.21% (p=1.000 n=1) 5000.0m ± ∞ ¹ +3921.73% (p=1.000 n=1) Run/16B 1.680m ± ∞ ¹ +0.51% (p=1.000 n=1) 1.426m ± ∞ ¹ -14.67% (p=1.000 n=1) 1.908m ± ∞ ¹ +14.13% (p=1.000 n=1) Run/64B 1.560m ± ∞ ¹ +36.53% (p=1.000 n=1) 1.345m ± ∞ ¹ +17.74% (p=1.000 n=1) 1.704m ± ∞ ¹ +49.10% (p=1.000 n=1) Run/256B 2.133m ± ∞ ¹ +79.24% (p=1.000 n=1) 1.833m ± ∞ ¹ +53.98% (p=1.000 n=1) 1.838m ± ∞ ¹ +54.40% (p=1.000 n=1) Run/1024B 2.863m ± ∞ ¹ +134.21% (p=1.000 n=1) 1.786m ± ∞ ¹ +46.12% (p=1.000 n=1) 5000.000m ± ∞ ¹ +408975.92% (p=1.000 n=1) Run/2048B 5000.000m ± ∞ ¹ +462837.24% (p=1.000 n=1) 2.900m ± ∞ ¹ +168.54% (p=1.000 n=1) 5000.000m ± ∞ ¹ +462837.24% (p=1.000 n=1) As such, limit the scope of when we apply our obfuscators in two ways. First, always apply a limit of 2048 bytes for all obfuscators. As we can see above, the two cheap obfuscators still add some overhead, and we aren't testing what happens if they run on truly huge strings. It's likely that they will still cause unexpected slow-down. Second, split the obfuscators along this line and call them "cheap" versus "expensive". The expensive ones are only used for sizes of up to 256 bytes. We still measure moderate slow-downs in build times of 400-600%, but this is a reasonable compromise for now. We will be filing bugs with upstream Go about the compiler overhead. We adjust generateLiterals accordingly; it now generates 100 literals between MinSize and MaxSize, which must be obfuscated in some way, and 5 literals past MaxSize, which don't need to be obfuscated, to check for issues like crashes. Fixes #928.
2026-04-22 15:47:04 +08:00 · 2026-04-14 08:13:41 +01:00
parent 626c8c86ee
commit ef2385ee97
5 changed files with 40 additions and 45 deletions
@@ -45,7 +45,7 @@ func FuzzObfuscate(f *testing.F) {
 	f.Add("long_enough_string", initialRandSeed)
 	f.Add("binary_\x00\x01\x02", initialRandSeed)
 	f.Add("whitespace    \n\t\t", initialRandSeed)
-	f.Add(strings.Repeat("x", (2<<10)+1), initialRandSeed) // past maxSize
+	f.Add(strings.Repeat("x", (2<<10)+1), initialRandSeed) // past MaxSize

 	tdir := f.TempDir()
 	var tdirCounter atomic.Int64
@@ -21,10 +21,12 @@ import (
 // moderate, this also decreases the likelihood for performance slowdowns.
 const MinSize = 8

-// maxSize is the upper limit of the size of string-like literals
-// which we will obfuscate with any of the available obfuscators.
-// Beyond that we apply only a subset of obfuscators which are guaranteed to run efficiently.
-const maxSize = 2 << 10 // KiB
+// MaxSize is the upper limit of the size of string-like literals we will obfuscate.
+const MaxSize = 2 << 10 // 2 KiB
+
+// MaxSizeExpensive is the upper limit for using expensive obfuscators (split, seed).
+// Above this size, only cheap obfuscators are used.
+const MaxSizeExpensive = 256

 const (
 	// minStringJunkBytes defines the minimum number of junk bytes to prepend or append during string obfuscation.
@@ -83,7 +85,7 @@ func Obfuscate(rand *mathrand.Rand, file *ast.File, info *types.Info, linkString

 		if typeAndValue.Type == types.Typ[types.String] && typeAndValue.Value != nil {
 			value := constant.StringVal(typeAndValue.Value)
-			if len(value) < MinSize {
+			if len(value) < MinSize || len(value) > MaxSize {
 				return true
 			}

@@ -143,7 +145,7 @@ func Obfuscate(rand *mathrand.Rand, file *ast.File, info *types.Info, linkString
 //
 // If the input node cannot be obfuscated nil is returned.
 func handleCompositeLiteral(obfRand *obfRand, isPointer bool, node *ast.CompositeLit, info *types.Info) ast.Node {
-	if len(node.Elts) < MinSize {
+	if len(node.Elts) < MinSize || len(node.Elts) > MaxSize {
 		return nil
 	}

@@ -357,9 +359,11 @@ func obfuscateByteArray(obfRand *obfRand, isPointer bool, data []byte, length in
 }

 func getNextObfuscator(obfRand *obfRand, size int) obfuscator {
-	if size <= maxSize {
+	if size < MinSize || size > MaxSize {
+		panic(fmt.Sprintf("getNextObfuscator called with size %d outside [%d, %d]", size, MinSize, MaxSize))
+	}
+	if size <= MaxSizeExpensive {
 		return obfRand.nextObfuscator()
-	} else {
-		return obfRand.nextLinearTimeObfuscator()
 	}
+	return obfRand.nextCheapObfuscator()
 }
@@ -60,20 +60,20 @@ type obfuscator interface {
 }

 var (
-	simpleObfuscator = simple{}
-
-	// Obfuscators contains all types which implement the obfuscator Interface
+	// Obfuscators contains all types which implement the obfuscator Interface.
 	Obfuscators = []obfuscator{
-		simpleObfuscator,
+		simple{},
 		swap{},
 		split{},
 		shuffle{},
 		seed{},
 	}

-	// LinearTimeObfuscators contains all types which implement the obfuscator Interface and can safely be used on large literals
-	LinearTimeObfuscators = []obfuscator{
-		simpleObfuscator,
+	// CheapObfuscators contains obfuscators safe to use on large literals.
+	// The expensive obfuscators scale poorly, so they are excluded here.
+	CheapObfuscators = []obfuscator{
+		simple{},
+		swap{},
 	}

 	TestObfuscator         string
@@ -268,11 +268,11 @@ func (r *obfRand) nextObfuscator() obfuscator {
 	return Obfuscators[r.Intn(len(Obfuscators))]
 }

-func (r *obfRand) nextLinearTimeObfuscator() obfuscator {
+func (r *obfRand) nextCheapObfuscator() obfuscator {
 	if r.testObfuscator != nil {
 		return r.testObfuscator
 	}
-	return LinearTimeObfuscators[r.Intn(len(LinearTimeObfuscators))]
+	return CheapObfuscators[r.Intn(len(CheapObfuscators))]
 }

 func newObfRand(rand *mathrand.Rand, file *ast.File, nameFunc NameProviderFunc) *obfRand {
@@ -26,6 +26,7 @@ import (
 	"github.com/rogpeppe/go-internal/testscript"

 	ah "mvdan.cc/garble/internal/asthelper"
+	"mvdan.cc/garble/internal/literals"
 )

 var proxyURL string
@@ -248,23 +249,10 @@ func bincmp(ts *testscript.TestScript, neg bool, args []string) {

 var testRand = mathrand.New(mathrand.NewSource(time.Now().UnixNano()))

-func generateStringLit(minSize int) *ast.BasicLit {
-	buffer := make([]byte, minSize)
-	_, err := testRand.Read(buffer)
-	if err != nil {
-		panic(err)
-	}
+const uniqueLitString = "garble_unique_string"

-	return ah.StringLit(string(buffer) + "a_unique_string_that_is_part_of_all_extra_literals")
-}
-
-// generateLiterals creates a new source code file with a few random literals inside.
-// All literals contain the string "a_unique_string_that_is_part_of_all_extra_literals"
-// so we can later check if they are all obfuscated by looking for this substring.
-// The code is designed such that the Go compiler does not optimize away the literals,
-// which would destroy the test.
-// This is achieved by defining a global variable `var x = ""` and an `init` function
-// which appends all literals to `x`.
+// generateLiterals creates a source file with random string literals appended
+// to a global var in init, preventing the compiler from optimizing them away.
 func generateLiterals(ts *testscript.TestScript, neg bool, args []string) {
 	if neg {
 		ts.Fatalf("unsupported: ! generate-literals")
@@ -290,29 +278,32 @@ func generateLiterals(ts *testscript.TestScript, neg bool, args []string) {

 	var statements []ast.Stmt

-	// Assignments which append 100 random small literals to x: `x += "the_small_random_literal"`
+	// 100 literals up to MaxSize, all containing uniqueLitString.
 	for range 100 {
+		randSize := testRand.Intn(literals.MaxSize - len(uniqueLitString) + 1)
+		buffer := make([]byte, randSize)
+		testRand.Read(buffer)
 		statements = append(
 			statements,
 			&ast.AssignStmt{
 				Lhs: []ast.Expr{ast.NewIdent("x")},
 				Tok: token.ADD_ASSIGN,
-				Rhs: []ast.Expr{generateStringLit(1 + testRand.Intn(255))},
+				Rhs: []ast.Expr{ah.StringLit(string(buffer) + uniqueLitString)},
 			},
 		)
 	}

-	// Assignments which append 5 random huge literals to x: `x += "the_huge_random_literal"`
-	// We add huge literals to make sure we obfuscate them fast.
-	// 5 * 128KiB is large enough that it would take a very, very long time
-	// to obfuscate those literals if too complex obfuscators are used.
+	// 5 huge literals past MaxSize, without uniqueLitString; not obfuscated.
 	for range 5 {
+		size := literals.MaxSize + 1 + testRand.Intn(128<<10)
+		buffer := make([]byte, size)
+		testRand.Read(buffer)
 		statements = append(
 			statements,
 			&ast.AssignStmt{
 				Lhs: []ast.Expr{ast.NewIdent("x")},
 				Tok: token.ADD_ASSIGN,
-				Rhs: []ast.Expr{generateStringLit(128 << 10)},
+				Rhs: []ast.Expr{ah.StringLit(string(buffer))},
 			},
 		)
 	}
@@ -27,13 +27,13 @@ generate-literals extra_literals.go

 # ensure we find the extra literals in an unobfuscated build
 go build
-binsubstr main$exe 'a_unique_string_that_is_part_of_all_extra_literals'
+binsubstr main$exe 'garble_unique_string'

 # ensure we don't find the extra literals in an obfuscated build
 exec garble -literals -debugdir=debug1 build
 exec ./main$exe
 cmp stderr main.stderr
-! binsubstr main$exe 'a_unique_string_that_is_part_of_all_extra_literals'
+! binsubstr main$exe 'garble_unique_string'

 # Check obfuscators.