mirror of
https://github.com/burrowers/garble.git
synced 2026-04-22 15:47:04 +08:00
ef2385ee97
The added benchmark script shows these numbers for building and running
with vanilla go on literal sizes between 16B and 2048B,
showing that vanilla Go isn't affected at all by these sizes:
│ go │
│ sec/op │
Build/16B 118.1m ± ∞ ¹
Build/64B 123.1m ± ∞ ¹
Build/256B 119.7m ± ∞ ¹
Build/1024B 119.1m ± ∞ ¹
Build/2048B 124.3m ± ∞ ¹
Run/16B 1.671m ± ∞ ¹
Run/64B 1.143m ± ∞ ¹
Run/256B 1.190m ± ∞ ¹
Run/1024B 1.222m ± ∞ ¹
Run/2048B 1.080m ± ∞ ¹
Our simple and swap obfuscators scale pretty well to these same sizes,
only causing moderate slow-downs to build and runtime speeds:
│ simple │ swap │
│ sec/op vs base │ sec/op vs base │
Build/16B 268.0m ± ∞ ¹ +126.88% (p=1.000 n=1) 262.0m ± ∞ ¹ +121.80% (p=1.000 n=1)
Build/64B 253.8m ± ∞ ¹ +106.16% (p=1.000 n=1) 252.4m ± ∞ ¹ +105.00% (p=1.000 n=1)
Build/256B 265.4m ± ∞ ¹ +121.78% (p=1.000 n=1) 276.7m ± ∞ ¹ +131.16% (p=1.000 n=1)
Build/1024B 267.4m ± ∞ ¹ +124.44% (p=1.000 n=1) 315.0m ± ∞ ¹ +164.48% (p=1.000 n=1)
Build/2048B 277.4m ± ∞ ¹ +123.11% (p=1.000 n=1) 383.8m ± ∞ ¹ +208.70% (p=1.000 n=1)
Run/16B 1.740m ± ∞ ¹ +4.12% (p=1.000 n=1) 1.463m ± ∞ ¹ -12.47% (p=1.000 n=1)
Run/64B 1.470m ± ∞ ¹ +28.66% (p=1.000 n=1) 1.455m ± ∞ ¹ +27.35% (p=1.000 n=1)
Run/256B 1.729m ± ∞ ¹ +45.25% (p=1.000 n=1) 1.812m ± ∞ ¹ +52.26% (p=1.000 n=1)
Run/1024B 1.315m ± ∞ ¹ +7.62% (p=1.000 n=1) 1.352m ± ∞ ¹ +10.60% (p=1.000 n=1)
Run/2048B 1.425m ± ∞ ¹ +31.93% (p=1.000 n=1) 1.316m ± ∞ ¹ +21.88% (p=1.000 n=1)
However, the other three cause huge slow-downs in both build and runtime speeds:
│ split │ shuffle │ seed │
│ sec/op vs base │ sec/op vs base │ sec/op vs base │
Build/16B 326.6m ± ∞ ¹ +176.53% (p=1.000 n=1) 363.1m ± ∞ ¹ +207.44% (p=1.000 n=1) 217.4m ± ∞ ¹ +84.05% (p=1.000 n=1)
Build/64B 400.6m ± ∞ ¹ +225.34% (p=1.000 n=1) 328.0m ± ∞ ¹ +166.39% (p=1.000 n=1) 262.1m ± ∞ ¹ +112.87% (p=1.000 n=1)
Build/256B 824.7m ± ∞ ¹ +589.08% (p=1.000 n=1) 588.2m ± ∞ ¹ +391.45% (p=1.000 n=1) 873.7m ± ∞ ¹ +630.05% (p=1.000 n=1)
Build/1024B 3257.7m ± ∞ ¹ +2634.84% (p=1.000 n=1) 1671.4m ± ∞ ¹ +1303.15% (p=1.000 n=1) 5000.0m ± ∞ ¹ +4097.53% (p=1.000 n=1)
Build/2048B 5000.0m ± ∞ ¹ +3921.73% (p=1.000 n=1) 3936.4m ± ∞ ¹ +3066.21% (p=1.000 n=1) 5000.0m ± ∞ ¹ +3921.73% (p=1.000 n=1)
Run/16B 1.680m ± ∞ ¹ +0.51% (p=1.000 n=1) 1.426m ± ∞ ¹ -14.67% (p=1.000 n=1) 1.908m ± ∞ ¹ +14.13% (p=1.000 n=1)
Run/64B 1.560m ± ∞ ¹ +36.53% (p=1.000 n=1) 1.345m ± ∞ ¹ +17.74% (p=1.000 n=1) 1.704m ± ∞ ¹ +49.10% (p=1.000 n=1)
Run/256B 2.133m ± ∞ ¹ +79.24% (p=1.000 n=1) 1.833m ± ∞ ¹ +53.98% (p=1.000 n=1) 1.838m ± ∞ ¹ +54.40% (p=1.000 n=1)
Run/1024B 2.863m ± ∞ ¹ +134.21% (p=1.000 n=1) 1.786m ± ∞ ¹ +46.12% (p=1.000 n=1) 5000.000m ± ∞ ¹ +408975.92% (p=1.000 n=1)
Run/2048B 5000.000m ± ∞ ¹ +462837.24% (p=1.000 n=1) 2.900m ± ∞ ¹ +168.54% (p=1.000 n=1) 5000.000m ± ∞ ¹ +462837.24% (p=1.000 n=1)
As such, limit the scope of when we apply our obfuscators in two ways.
First, always apply a limit of 2048 bytes for all obfuscators.
As we can see above, the two cheap obfuscators still add some overhead,
and we aren't testing what happens if they run on truly huge strings.
It's likely that they will still cause unexpected slow-down.
Second, split the obfuscators along this line and call them
"cheap" versus "expensive". The expensive ones are only used
for sizes of up to 256 bytes. We still measure moderate slow-downs
in build times of 400-600%, but this is a reasonable compromise for now.
We will be filing bugs with upstream Go about the compiler overhead.
We adjust generateLiterals accordingly;
it now generates 100 literals between MinSize and MaxSize,
which must be obfuscated in some way, and 5 literals past MaxSize,
which don't need to be obfuscated, to check for issues like crashes.
Fixes #928.
370 lines
10 KiB
Go
370 lines
10 KiB
Go
// Copyright (c) 2020, The Garble Authors.
|
|
// See LICENSE for licensing information.
|
|
|
|
package literals
|
|
|
|
import (
|
|
"fmt"
|
|
"go/ast"
|
|
"go/constant"
|
|
"go/token"
|
|
"go/types"
|
|
mathrand "math/rand"
|
|
"strings"
|
|
|
|
"golang.org/x/tools/go/ast/astutil"
|
|
ah "mvdan.cc/garble/internal/asthelper"
|
|
)
|
|
|
|
// MinSize is the lower bound limit, of the size of string-like literals
|
|
// which we will obfuscate. This is needed in order for binary size to stay relatively
|
|
// moderate, this also decreases the likelihood for performance slowdowns.
|
|
const MinSize = 8
|
|
|
|
// MaxSize is the upper limit of the size of string-like literals we will obfuscate.
|
|
const MaxSize = 2 << 10 // 2 KiB
|
|
|
|
// MaxSizeExpensive is the upper limit for using expensive obfuscators (split, seed).
|
|
// Above this size, only cheap obfuscators are used.
|
|
const MaxSizeExpensive = 256
|
|
|
|
const (
|
|
// minStringJunkBytes defines the minimum number of junk bytes to prepend or append during string obfuscation.
|
|
minStringJunkBytes = 2
|
|
// maxStringJunkBytes defines the maximum number of junk bytes to prepend or append during string obfuscation.
|
|
maxStringJunkBytes = 8
|
|
)
|
|
|
|
// NameProviderFunc defines a function type that generates a string based on a random source and a base name.
|
|
type NameProviderFunc func(rand *mathrand.Rand, baseName string) string
|
|
|
|
// Obfuscate replaces literals with obfuscated anonymous functions.
|
|
func Obfuscate(rand *mathrand.Rand, file *ast.File, info *types.Info, linkStrings map[*types.Var]string, nameFunc NameProviderFunc) *ast.File {
|
|
obfRand := newObfRand(rand, file, nameFunc)
|
|
pre := func(cursor *astutil.Cursor) bool {
|
|
switch node := cursor.Node().(type) {
|
|
case *ast.FuncDecl:
|
|
// Obfuscating literals can push the stack frame over the //go:nosplit limit,
|
|
// which is just 800 bytes. These funcs are mostly in the runtime,
|
|
// so obfuscating strings in these is less important in any case.
|
|
if node.Doc != nil {
|
|
for _, comment := range node.Doc.List {
|
|
if strings.HasPrefix(comment.Text, "//go:nosplit") {
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
case *ast.GenDecl:
|
|
// constants are obfuscated by replacing all references with the obfuscated value
|
|
if node.Tok == token.CONST {
|
|
return false
|
|
}
|
|
case *ast.ValueSpec:
|
|
for _, name := range node.Names {
|
|
obj := info.Defs[name].(*types.Var)
|
|
if _, e := linkStrings[obj]; e {
|
|
// Skip this entire ValueSpec to not break -ldflags=-X.
|
|
// TODO: support obfuscating those injected strings, too.
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
post := func(cursor *astutil.Cursor) bool {
|
|
node, ok := cursor.Node().(ast.Expr)
|
|
if !ok {
|
|
return true
|
|
}
|
|
|
|
typeAndValue := info.Types[node]
|
|
if !typeAndValue.IsValue() {
|
|
return true
|
|
}
|
|
|
|
if typeAndValue.Type == types.Typ[types.String] && typeAndValue.Value != nil {
|
|
value := constant.StringVal(typeAndValue.Value)
|
|
if len(value) < MinSize || len(value) > MaxSize {
|
|
return true
|
|
}
|
|
|
|
cursor.Replace(withPos(obfuscateString(obfRand, value), node.Pos()))
|
|
|
|
return true
|
|
}
|
|
|
|
switch node := node.(type) {
|
|
case *ast.UnaryExpr:
|
|
// Account for the possibility of address operators like
|
|
// &[]byte used inline with function arguments.
|
|
//
|
|
// See issue #520.
|
|
|
|
if node.Op != token.AND {
|
|
return true
|
|
}
|
|
|
|
if child, ok := node.X.(*ast.CompositeLit); ok {
|
|
newnode := handleCompositeLiteral(obfRand, true, child, info)
|
|
if newnode != nil {
|
|
cursor.Replace(newnode)
|
|
}
|
|
}
|
|
|
|
case *ast.CompositeLit:
|
|
// We replaced the &[]byte{...} case above. Here we account for the
|
|
// standard []byte{...} or [4]byte{...} value form.
|
|
//
|
|
// We need two separate calls to cursor.Replace, as it only supports
|
|
// replacing the node we're currently visiting, and the pointer variant
|
|
// requires us to move the ampersand operator.
|
|
|
|
parent, ok := cursor.Parent().(*ast.UnaryExpr)
|
|
if ok && parent.Op == token.AND {
|
|
return true
|
|
}
|
|
|
|
newnode := handleCompositeLiteral(obfRand, false, node, info)
|
|
if newnode != nil {
|
|
cursor.Replace(newnode)
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
newFile := astutil.Apply(file, pre, post).(*ast.File)
|
|
obfRand.proxyDispatcher.AddToFile(newFile)
|
|
return newFile
|
|
}
|
|
|
|
// handleCompositeLiteral checks if the input node is []byte or [...]byte and
|
|
// calls the appropriate obfuscation method, returning a new node that should
|
|
// be used to replace it.
|
|
//
|
|
// If the input node cannot be obfuscated nil is returned.
|
|
func handleCompositeLiteral(obfRand *obfRand, isPointer bool, node *ast.CompositeLit, info *types.Info) ast.Node {
|
|
if len(node.Elts) < MinSize || len(node.Elts) > MaxSize {
|
|
return nil
|
|
}
|
|
|
|
byteType := types.Universe.Lookup("byte").Type()
|
|
|
|
var arrayLen int64
|
|
switch y := info.TypeOf(node.Type).(type) {
|
|
case *types.Array:
|
|
if y.Elem() != byteType {
|
|
return nil
|
|
}
|
|
|
|
arrayLen = y.Len()
|
|
|
|
case *types.Slice:
|
|
if y.Elem() != byteType {
|
|
return nil
|
|
}
|
|
|
|
default:
|
|
return nil
|
|
}
|
|
|
|
data := make([]byte, 0, len(node.Elts))
|
|
|
|
for _, el := range node.Elts {
|
|
elType := info.Types[el]
|
|
|
|
if elType.Value == nil || elType.Value.Kind() != constant.Int {
|
|
return nil
|
|
}
|
|
|
|
value, ok := constant.Uint64Val(elType.Value)
|
|
if !ok {
|
|
panic(fmt.Sprintf("cannot parse byte value: %v", elType.Value))
|
|
}
|
|
|
|
data = append(data, byte(value))
|
|
}
|
|
|
|
if arrayLen > 0 {
|
|
return withPos(obfuscateByteArray(obfRand, isPointer, data, arrayLen), node.Pos())
|
|
}
|
|
|
|
return withPos(obfuscateByteSlice(obfRand, isPointer, data), node.Pos())
|
|
}
|
|
|
|
// withPos sets any token.Pos fields under node which affect printing to pos.
|
|
// Note that we can't set all token.Pos fields, since some affect the semantics.
|
|
//
|
|
// This function is useful so that go/printer doesn't try to estimate position
|
|
// offsets, which can end up in printing comment directives too early.
|
|
//
|
|
// We don't set any "end" or middle positions, because they seem irrelevant.
|
|
func withPos(node ast.Node, pos token.Pos) ast.Node {
|
|
for node := range ast.Preorder(node) {
|
|
switch node := node.(type) {
|
|
case *ast.BasicLit:
|
|
node.ValuePos = pos
|
|
case *ast.Ident:
|
|
node.NamePos = pos
|
|
case *ast.CompositeLit:
|
|
node.Lbrace = pos
|
|
node.Rbrace = pos
|
|
case *ast.ArrayType:
|
|
node.Lbrack = pos
|
|
case *ast.FuncType:
|
|
node.Func = pos
|
|
case *ast.BinaryExpr:
|
|
node.OpPos = pos
|
|
case *ast.StarExpr:
|
|
node.Star = pos
|
|
case *ast.CallExpr:
|
|
node.Lparen = pos
|
|
node.Rparen = pos
|
|
|
|
case *ast.GenDecl:
|
|
node.TokPos = pos
|
|
case *ast.ReturnStmt:
|
|
node.Return = pos
|
|
case *ast.ForStmt:
|
|
node.For = pos
|
|
case *ast.RangeStmt:
|
|
node.For = pos
|
|
case *ast.BranchStmt:
|
|
node.TokPos = pos
|
|
}
|
|
}
|
|
return node
|
|
}
|
|
|
|
func obfuscateString(obfRand *obfRand, data string) *ast.CallExpr {
|
|
obf := getNextObfuscator(obfRand, len(data))
|
|
|
|
// Generate junk bytes to to prepend and append to the data.
|
|
// This is to prevent the obfuscated string from being easily fingerprintable.
|
|
junkBytes := make([]byte, obfRand.Intn(maxStringJunkBytes-minStringJunkBytes)+minStringJunkBytes)
|
|
obfRand.Read(junkBytes)
|
|
splitIdx := obfRand.Intn(len(junkBytes))
|
|
|
|
extKeys := randExtKeys(obfRand.Rand)
|
|
|
|
plainData := []byte(data)
|
|
plainDataWithJunkBytes := append(append(junkBytes[:splitIdx], plainData...), junkBytes[splitIdx:]...)
|
|
|
|
block := obf.obfuscate(obfRand.Rand, plainDataWithJunkBytes, extKeys)
|
|
params, args := extKeysToParams(obfRand, extKeys)
|
|
|
|
// Generate unique cast bytes to string function and hide it using proxyDispatcher:
|
|
//
|
|
// func(x []byte) string {
|
|
// return string(x[<splitIdx>:<splitIdx+len(plainData)>])
|
|
// }
|
|
funcTyp := &ast.FuncType{
|
|
Params: &ast.FieldList{List: []*ast.Field{{
|
|
Type: ah.ByteSliceType(),
|
|
}}},
|
|
Results: &ast.FieldList{List: []*ast.Field{{
|
|
Type: ast.NewIdent("string"),
|
|
}}},
|
|
}
|
|
funcVal := &ast.FuncLit{
|
|
Type: &ast.FuncType{
|
|
Params: &ast.FieldList{List: []*ast.Field{{
|
|
Names: []*ast.Ident{ast.NewIdent("x")},
|
|
Type: ah.ByteSliceType(),
|
|
}}},
|
|
Results: &ast.FieldList{List: []*ast.Field{{
|
|
Type: ast.NewIdent("string"),
|
|
}}},
|
|
},
|
|
Body: ah.BlockStmt(
|
|
ah.ReturnStmt(
|
|
ah.CallExprByName("string",
|
|
&ast.SliceExpr{
|
|
X: ast.NewIdent("x"),
|
|
Low: ah.IntLit(splitIdx),
|
|
High: ah.IntLit(splitIdx + len(plainData)),
|
|
},
|
|
),
|
|
),
|
|
),
|
|
}
|
|
block.List = append(block.List, ah.ReturnStmt(ah.CallExpr(obfRand.proxyDispatcher.HideValue(funcVal, funcTyp), ast.NewIdent("data"))))
|
|
return ah.LambdaCall(params, ast.NewIdent("string"), block, args)
|
|
}
|
|
|
|
func obfuscateByteSlice(obfRand *obfRand, isPointer bool, data []byte) *ast.CallExpr {
|
|
obf := getNextObfuscator(obfRand, len(data))
|
|
|
|
extKeys := randExtKeys(obfRand.Rand)
|
|
block := obf.obfuscate(obfRand.Rand, data, extKeys)
|
|
params, args := extKeysToParams(obfRand, extKeys)
|
|
|
|
if isPointer {
|
|
block.List = append(block.List, ah.ReturnStmt(
|
|
ah.UnaryExpr(token.AND, ast.NewIdent("data")),
|
|
))
|
|
return ah.LambdaCall(params, ah.StarExpr(ah.ByteSliceType()), block, args)
|
|
}
|
|
|
|
block.List = append(block.List, ah.ReturnStmt(ast.NewIdent("data")))
|
|
return ah.LambdaCall(params, ah.ByteSliceType(), block, args)
|
|
}
|
|
|
|
func obfuscateByteArray(obfRand *obfRand, isPointer bool, data []byte, length int64) *ast.CallExpr {
|
|
obf := getNextObfuscator(obfRand, len(data))
|
|
|
|
extKeys := randExtKeys(obfRand.Rand)
|
|
block := obf.obfuscate(obfRand.Rand, data, extKeys)
|
|
params, args := extKeysToParams(obfRand, extKeys)
|
|
|
|
arrayType := ah.ByteArrayType(length)
|
|
|
|
sliceToArray := []ast.Stmt{
|
|
&ast.DeclStmt{
|
|
Decl: &ast.GenDecl{
|
|
Tok: token.VAR,
|
|
Specs: []ast.Spec{&ast.ValueSpec{
|
|
Names: []*ast.Ident{ast.NewIdent("newdata")},
|
|
Type: arrayType,
|
|
}},
|
|
},
|
|
},
|
|
&ast.RangeStmt{
|
|
Key: ast.NewIdent("i"),
|
|
Tok: token.DEFINE,
|
|
X: ast.NewIdent("data"),
|
|
Body: ah.BlockStmt(
|
|
ah.AssignStmt(
|
|
ah.IndexExprByExpr(ast.NewIdent("newdata"), ast.NewIdent("i")),
|
|
ah.IndexExprByExpr(ast.NewIdent("data"), ast.NewIdent("i")),
|
|
),
|
|
),
|
|
},
|
|
}
|
|
|
|
var retexpr ast.Expr = ast.NewIdent("newdata")
|
|
if isPointer {
|
|
retexpr = ah.UnaryExpr(token.AND, retexpr)
|
|
}
|
|
|
|
sliceToArray = append(sliceToArray, ah.ReturnStmt(retexpr))
|
|
block.List = append(block.List, sliceToArray...)
|
|
|
|
if isPointer {
|
|
return ah.LambdaCall(params, ah.StarExpr(arrayType), block, args)
|
|
}
|
|
|
|
return ah.LambdaCall(params, arrayType, block, args)
|
|
}
|
|
|
|
func getNextObfuscator(obfRand *obfRand, size int) obfuscator {
|
|
if size < MinSize || size > MaxSize {
|
|
panic(fmt.Sprintf("getNextObfuscator called with size %d outside [%d, %d]", size, MinSize, MaxSize))
|
|
}
|
|
if size <= MaxSizeExpensive {
|
|
return obfRand.nextObfuscator()
|
|
}
|
|
return obfRand.nextCheapObfuscator()
|
|
}
|