From 8c4dfe941be38b2bd79d102c51a3ac25a6ff4586 Mon Sep 17 00:00:00 2001 From: "Bryan C. Mills" Date: Fri, 24 Apr 2020 21:47:31 -0400 Subject: [PATCH] unsafeslice: add OfString and AsString to convert between strings and slices These functions, unlike most other variations of the same logic I have seen in the wild, make a serious attempt to detect and report erroneous mutations, especially when the race detector is available. (Go programs can and do assume that strings are immutable, and not even the unsafeslice package should undermine that invariant.) --- fnvhash.go | 16 ++++ internal/eventually/eventually.go | 40 ++++++++++ maphash.go | 27 +++++++ norace.go | 9 +++ race.go | 9 +++ unsafeslice.go | 117 ++++++++++++++++++++++++++++++ unsafeslice_test.go | 109 +++++++++++++++++++++++++++- 7 files changed, 326 insertions(+), 1 deletion(-) create mode 100644 fnvhash.go create mode 100644 internal/eventually/eventually.go create mode 100644 maphash.go create mode 100644 norace.go create mode 100644 race.go diff --git a/fnvhash.go b/fnvhash.go new file mode 100644 index 0000000..fcc43d4 --- /dev/null +++ b/fnvhash.go @@ -0,0 +1,16 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !go1.14 + +package unsafeslice + +import ( + "hash" + "hash/fnv" +) + +func newHash() hash.Hash64 { + return fnv.New64a() +} diff --git a/internal/eventually/eventually.go b/internal/eventually/eventually.go new file mode 100644 index 0000000..7d25d52 --- /dev/null +++ b/internal/eventually/eventually.go @@ -0,0 +1,40 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package eventually enables the use of finalizers whose registration can be +// blocked until an arbitrary point in the program. +package eventually + +import ( + "runtime" +) + +var unblocked = make(chan struct{}) + +func init() { + close(unblocked) +} + +// Block delays finalizer registration until unblock is called. +func Block() (unblock func()) { + c := make(chan struct{}) + unblocked = c + return func() { close(c) } +} + +// SetFinalizer sets a finalizer f for pointer p. +// +// If registration is currently blocked, SetFinalizer registers it in a +// background goroutine that first waits for registration to be unblocked. +func SetFinalizer(p, f interface{}) { + select { + case <-unblocked: + runtime.SetFinalizer(p, f) + default: + go func() { + <-unblocked + runtime.SetFinalizer(p, f) + }() + } +} diff --git a/maphash.go b/maphash.go new file mode 100644 index 0000000..fd0d986 --- /dev/null +++ b/maphash.go @@ -0,0 +1,27 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build go1.14 + +package unsafeslice + +import ( + "hash/maphash" + "sync" +) + +var seed struct { + once sync.Once + seed maphash.Seed +} + +func newHash() *maphash.Hash { + seed.once.Do(func() { + seed.seed = maphash.MakeSeed() + }) + + h := new(maphash.Hash) + h.SetSeed(seed.seed) + return h +} diff --git a/norace.go b/norace.go new file mode 100644 index 0000000..3208dc3 --- /dev/null +++ b/norace.go @@ -0,0 +1,9 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !race + +package unsafeslice + +const raceEnabled = false diff --git a/race.go b/race.go new file mode 100644 index 0000000..d3e96a2 --- /dev/null +++ b/race.go @@ -0,0 +1,9 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build race + +package unsafeslice + +const raceEnabled = true diff --git a/unsafeslice.go b/unsafeslice.go index 1112082..e932d97 100644 --- a/unsafeslice.go +++ b/unsafeslice.go @@ -9,7 +9,10 @@ package unsafeslice import ( "fmt" "reflect" + "sync/atomic" "unsafe" + + "github.com/bcmills/unsafeslice/internal/eventually" ) // SetAt sets dst, which must be a non-nil pointer to a variable of a slice @@ -99,3 +102,117 @@ func ConvertAt(dst, src interface{}) { hdr.Cap = int(dstCap) hdr.Len = int(dstLen) } + +// OfString returns a slice that refers to the data backing the string s. +// +// The caller must ensure that the contents of the slice are never mutated. +// +// Programs that use unsafeslice.OfString should be tested under the race +// detector to flag erroneous mutations. +func OfString(s string) []byte { + p := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&s)).Data) + + var b []byte + hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b)) + hdr.Data = uintptr(p) + hdr.Cap = len(s) + hdr.Len = len(s) + + maybeDetectMutations(b) + return b +} + +// AsString returns a string that refers to the data backing the slice s. +// +// The caller must ensure that the contents of the slice are never again +// mutated, and that its memory either is managed by the Go garbage collector or +// remains valid for the remainder of this process's lifetime. +// +// Programs that use unsafeslice.AsString should be tested under the race +// detector to flag erroneous mutations. +func AsString(b []byte) string { + if len(b) == 0 { + return "" + } + p := unsafe.Pointer(&b[0]) + + var s string + hdr := (*reflect.StringHeader)(unsafe.Pointer(&s)) + hdr.Data = uintptr(p) + hdr.Len = len(b) + + maybeDetectMutations(b) + return s +} + +// maybeDetectMutations makes a best effort to detect mutations and lifetime +// errors on the slice b. It is most effective when run under the race detector. +func maybeDetectMutations(b []byte) { + if safetyReduced() || len(b) == 0 { + return + } + + checksum := new(uint64) + + h := newHash() + h.Write(b) + *checksum = h.Sum64() + + if raceEnabled { + // Start a goroutine that reads from the slice and does not have a + // happens-before relationship with any other event in the program. + // + // Even if the goroutine is scheduled and runs to completion immediately, if + // anything ever mutates the slice the race detector should report it as a + // read/write race. The erroneous writer should be easy to identify from the + // race report. + go func() { + h := newHash() + h.Write(b) + if *checksum != h.Sum64() { + panic(fmt.Sprintf("mutation detected in string at address 0x%012x", &b[0])) + } + }() + } + + // We can't set a finalizer on the slice contents itself, since we don't know + // how it was allocated (or even whether it is owned by the Go runtime). + // Instead, we use a finalizer on the checksum allocation to make a best + // effort to re-check the hash at some arbitrary future point in time. + // + // This attempts to produce a longer delay than scheduling a goroutine + // immediately, in order to catch more mutations, but only extends the + // lifetimes of allocated strings to the next GC cycle rather than by an + // arbitrary time interval. + // + // However, because the lifetime of p is not tied to the lifetime of the + // backing data in any way, this approach could backfire and run the check + // much too early — before a dangerous mutation has even occurred. It's better + // than nothing, but not an adequate substitute for the race-enabled version + // of this check. + eventually.SetFinalizer(checksum, func(checksum *uint64) { + h := newHash() + h.Write(b) + if *checksum != h.Sum64() { + panic(fmt.Sprintf("mutation detected in string at address 0x%012x", &b[0])) + } + }) +} + +// ReduceSafety may make the unsafeslice package even less safe, +// but more efficient. +// +// ReduceSafety has no effect when the race detector is enabled: +// the available safety checks are always enabled under the race detector, +// and will generally produce clearer diagnostics. +func ReduceSafety() { + if !raceEnabled { + atomic.StoreInt32(&safetyReducedFlag, 1) + } +} + +var safetyReducedFlag int32 = 0 + +func safetyReduced() bool { + return atomic.LoadInt32(&safetyReducedFlag) != 0 +} diff --git a/unsafeslice_test.go b/unsafeslice_test.go index 3a9f90d..b42db33 100644 --- a/unsafeslice_test.go +++ b/unsafeslice_test.go @@ -1,14 +1,25 @@ // Copyright 2020 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. + package unsafeslice_test import ( + "bytes" "fmt" + "hash" + "hash/fnv" + "io" + "os" + "os/exec" + "reflect" + "runtime" + "strings" "testing" "unsafe" "github.com/bcmills/unsafeslice" + "github.com/bcmills/unsafeslice/internal/eventually" ) // asCPointer returns b as a C-style pointer and length @@ -82,7 +93,7 @@ func TestConvertAtErrors(t *testing.T) { }{ { desc: "incompatible capacity", - src: []byte("foobar")[:4], + src: []byte("foobar")[:4:6], dst: new([]uint32), }, { @@ -105,3 +116,99 @@ func TestConvertAtErrors(t *testing.T) { }) } } + +func ExampleOfString() { + s := "Hello, world!" + + // Temporarily view the string s as a slice, + // so that we can compute its checksum with an arbitrary hash.Hash + // implementation without needing to copy it. + var h hash.Hash = fnv.New64a() + b := unsafeslice.OfString(s) + + // This is safe because the contract for an io.Writer requires: + // > Write must not modify the slice data, even temporarily. + h.Write(b) + + fmt.Printf("%x\n", h.Sum(nil)) + + // Output: + // 38d1334144987bf4 +} + +func ExampleToString() { + const input = "Hello, world!" + h := fnv.New64a() + io.WriteString(h, input) + + // Save the computed checksum as an immutable string, + // without incurring any additional allocations or copying + // (beyond the slice for the Sum output). + binaryChecksum := unsafeslice.AsString(h.Sum(nil)) + + fmt.Printf("%x\n", binaryChecksum) + + // Output: + // 38d1334144987bf4 +} + +func TestStringMutations(t *testing.T) { + if os.Getenv("UNSAFESLICE_TEST_STRING_MUTATIONS") != "" { + // Block "eventually" finalizers from running until we have actually mutated + // the slices. This guarantees that the finalizer will detect the mutation, + // even if the scheduler and collector are as antagonistic as possible. + unblock := eventually.Block() + + t.Run("AsString", func(t *testing.T) { + b := []byte("Hello, world!") + _ = unsafeslice.AsString(b) + copy(b, "Kaboom") + }) + + t.Run("OfString", func(t *testing.T) { + // Construct a string backed by mutable memory, but avoid + // unsafeslice.AsString so that we don't accidentally trigger its mutation + // check instead. + // (This test is not interesting if the attempt at mutating the string + // faults immediately or fails for reasons unrelated to OfString.) + buf := []byte("Hello, world!") + var s string + hdr := (*reflect.StringHeader)(unsafe.Pointer(&s)) + hdr.Data = uintptr(unsafe.Pointer(&buf[0])) + hdr.Len = len(buf) + + b := unsafeslice.OfString(s) + copy(b, "Kaboom") + }) + + unblock() + var waste []*uint64 + for { + runtime.GC() + waste = append(waste, new(uint64)) // Allocate garbage to attempt to force finalizers to run. + } + runtime.KeepAlive(waste) + } + + runSubtestProcess := func(t *testing.T) { + t.Parallel() + + cmd := exec.Command(os.Args[0], "-test.run="+t.Name(), "-test.v") + cmd.Env = append(os.Environ(), "UNSAFESLICE_TEST_STRING_MUTATIONS=1") + out := new(bytes.Buffer) + cmd.Stdout = out + cmd.Stderr = out + if err := cmd.Start(); err != nil { + t.Fatal(err) + } + + err := cmd.Wait() + t.Logf("%s:\n%s", strings.Join(cmd.Args, " "), out) + if err == nil { + t.Errorf("Test subprocess passed; want a crash due to detected mutations.") + } + } + + t.Run("AsString", runSubtestProcess) + t.Run("OfString", runSubtestProcess) +}