diff --git a/cmd/goat-check/main.go b/cmd/goat-check/main.go
new file mode 100644
index 0000000..92992a3
--- /dev/null
+++ b/cmd/goat-check/main.go
@@ -0,0 +1,157 @@
+package main
+
+import (
+	"errors"
+	"flag"
+	"fmt"
+	"io"
+	"os"
+	"sync"
+	"time"
+
+	"github.com/mknyszek/goat"
+	"github.com/mknyszek/goat/simulation/toolbox"
+
+	"golang.org/x/exp/mmap"
+)
+
+var printFlag *bool = flag.Bool("print", false, "print events as they're seen")
+
+func init() {
+	flag.Usage = func() {
+		fmt.Fprintf(flag.CommandLine.Output(), "Usage of %s:\n", os.Args[0])
+		fmt.Fprintf(flag.CommandLine.Output(), "Utility that sanity-checks Go allocation traces\n")
+		fmt.Fprintf(flag.CommandLine.Output(), "and prints some statistics.\n")
+		fmt.Fprintf(flag.CommandLine.Output(), "usage: %s [flags] <allocation-trace-file>\n")
+		flag.PrintDefaults()
+	}
+}
+
+func handleError(err error, usage bool) {
+	fmt.Fprintf(os.Stderr, "error: %v\n", err)
+	if usage {
+		flag.Usage()
+	}
+	os.Exit(1)
+}
+
+func main() {
+	flag.Parse()
+	if flag.NArg() != 1 {
+		handleError(errors.New("incorrect number of arguments"), true)
+	}
+	r, err := mmap.Open(flag.Arg(0))
+	if err != nil {
+		handleError(fmt.Errorf("incorrect number of arguments: %v", err), false)
+	}
+	defer r.Close()
+	fmt.Println("Generating parser...")
+	p, err := goat.NewParser(r)
+	if err != nil {
+		handleError(fmt.Errorf("creating parser: %v", err), false)
+	}
+	fmt.Println("Parsing events...")
+
+	var pMu sync.Mutex
+	progressDone := make(chan struct{})
+	go func() {
+		for {
+			pMu.Lock()
+			prog := p.Progress() * 100.0
+			pMu.Unlock()
+
+			fmt.Printf("Processing... %.4f%%\r", prog)
+			select {
+			case <-progressDone:
+				fmt.Println()
+				close(progressDone)
+				return
+			case <-time.After(time.Second):
+			}
+		}
+	}()
+
+	const maxErrors = 20
+	allocs, frees, gcs := 0, 0, 0
+	var sanity toolbox.AddressSet
+	var reuseWithoutFree []goat.Event
+	var doubleFree []goat.Event
+	minTicks := ^uint64(0)
+	for {
+		pMu.Lock()
+		ev, err := p.Next()
+		pMu.Unlock()
+		if err == io.EOF {
+			break
+		}
+		if minTicks == ^uint64(0) {
+			minTicks = ev.Timestamp
+		}
+		if err != nil {
+			handleError(fmt.Errorf("parsing events: %v", err), false)
+		}
+		switch ev.Kind {
+		case goat.EventAlloc:
+			if *printFlag {
+				fmt.Printf("[%d P %d] alloc(%d) @ 0x%x\n", ev.Timestamp-minTicks, ev.P, ev.Size, ev.Address)
+			}
+			if ok := sanity.Add(ev.Address); !ok {
+				reuseWithoutFree = append(reuseWithoutFree, ev)
+			}
+			allocs++
+		case goat.EventFree:
+			if *printFlag {
+				fmt.Printf("[%d P %d] free @ 0x%x\n", ev.Timestamp-minTicks, ev.P, ev.Address)
+			}
+			if ok := sanity.Remove(ev.Address); !ok {
+				doubleFree = append(doubleFree, ev)
+			}
+			frees++
+		case goat.EventGCEnd:
+			if *printFlag {
+				fmt.Printf("[%d P %d] GC end\n", ev.Timestamp-minTicks, ev.P)
+			}
+			gcs++
+		}
+		if len(reuseWithoutFree)+len(doubleFree) > maxErrors {
+			break
+		}
+	}
+	progressDone <- struct{}{}
+	<-progressDone
+
+	if errcount := len(reuseWithoutFree) + len(doubleFree); errcount != 0 {
+		tooMany := errcount > maxErrors
+		if tooMany {
+			errcount = maxErrors
+			fmt.Fprintf(os.Stderr, "found >%d errors in trace:\n", maxErrors)
+		} else {
+			fmt.Fprintf(os.Stderr, "found %d errors in trace:\n", errcount)
+		}
+		for i := 0; i < errcount; i++ {
+			ts1, ts2 := ^uint64(0), ^uint64(0)
+			var e1, e2 *goat.Event
+			if len(reuseWithoutFree) != 0 {
+				ts1 = reuseWithoutFree[0].Timestamp
+				e1 = &reuseWithoutFree[0]
+			}
+			if len(doubleFree) != 0 {
+				ts2 = doubleFree[0].Timestamp
+				e2 = &doubleFree[0]
+			}
+			if ts1 < ts2 {
+				fmt.Fprintf(os.Stderr, "  allocated over slot 0x%x\n", e1.Address)
+				reuseWithoutFree = reuseWithoutFree[1:]
+			} else {
+				fmt.Fprintf(os.Stderr, "  freed free slot 0x%x\n", e2.Address)
+				doubleFree = doubleFree[1:]
+			}
+		}
+		if tooMany {
+			fmt.Fprintf(os.Stderr, "too many errors\n")
+		}
+	}
+	fmt.Printf("Allocs: %d\n", allocs)
+	fmt.Printf("Frees:  %d\n", frees)
+	fmt.Printf("GCs:    %d\n", gcs)
+}
diff --git a/event.go b/event.go
new file mode 100644
index 0000000..f0ed193
--- /dev/null
+++ b/event.go
@@ -0,0 +1,39 @@
+package goat
+
+// EventKind indicates what kind of allocation trace event
+// is captured and returned.
+type EventKind uint8
+
+const (
+	EventBad   EventKind = iota
+	EventAlloc           // Allocation.
+	EventFree            // Free.
+	EventGCEnd           // GC mark termination.
+)
+
+// Event represents a single allocation trace event.
+type Event struct {
+	// Timestamp is the time in non-normalized CPU ticks
+	// for this event.
+	Timestamp uint64
+
+	// Address is the address for the allocation or free.
+	// Only valid when Kind == EventAlloc or Kind == EventFree.
+	Address uint64
+
+	// Size indicates the size of the allocation.
+	// Only valid when Kind == EventAlloc.
+	Size uint64
+
+	// P indicates which processor generated the event.
+	// Valid for all events.
+	P int32
+
+	// Array indicates whether an allocation was for
+	// an array type.
+	Array bool
+
+	// Kind indicates what kind of event this is.
+	// This may be assumed to always be valid.
+	Kind EventKind
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..ba8b427
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,8 @@
+module github.com/mknyszek/goat
+
+go 1.14
+
+require (
+	golang.org/x/exp v0.0.0-20200331195152-e8c3332aa8e5
+	golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..ff01955
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,25 @@
+dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
+github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20200331195152-e8c3332aa8e5 h1:FR+oGxGfbQu1d+jglI3rCkjAjUnhRSZcUxr+DqlDLNo=
+golang.org/x/exp v0.0.0-20200331195152-e8c3332aa8e5/go.mod h1:4M0jN8W1tt0AVLNr8HDosyJCDCDuyL9N9+3m7wDWgKw=
+golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
+golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
+golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a h1:WXEvlFVvvGxCJLG6REjsT03iWnKLEWinaScsxF2Vm2o=
+golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
diff --git a/parse.go b/parse.go
new file mode 100644
index 0000000..f82ba40
--- /dev/null
+++ b/parse.go
@@ -0,0 +1,528 @@
+package goat
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"math/bits"
+	"runtime"
+	"sort"
+	"sync"
+
+	"golang.org/x/sync/errgroup"
+)
+
+const batchSize = 32 << 10
+
+// Parser contains the Go allocation trace parsing
+// state.
+type Parser struct {
+	src          Source
+	index        [][]batchOffset
+	batches      []batchReader
+	totalBatches uint64
+}
+
+// Source is an allocation trace source.
+type Source interface {
+	io.ReaderAt
+
+	// Len returns the size of the allocation
+	// trace in bytes.
+	Len() int
+}
+
+type batchOffset struct {
+	startTicks uint64
+	fileOffset int64
+}
+
+func (b batchOffset) headerSize() uint64 {
+	return 3 + uint64(bits.Len64(b.startTicks)+6)/7
+}
+
+const (
+	atEvBad uint8 = iota
+	atEvSpanAcquire
+	atEvAlloc
+	atEvAllocArray
+	atEvAllocLarge
+	atEvAllocLargeNoscan
+	atEvAllocLargeArray
+	atEvAllocLargeArrayNoscan
+	atEvSpanRelease
+	atEvSweep
+	atEvFree
+	atEvMarkTerm
+	atEvSync
+	atEvBatchStart
+	atEvBatchEnd
+)
+
+func parseVarint(buf []byte) (int, uint64, error) {
+	result := uint64(0)
+	shift := uint(0)
+	i := 0
+loop:
+	if i >= len(buf) {
+		return 0, 0, fmt.Errorf("not enough bytes left to decode varint")
+	}
+	result |= uint64(buf[i]&0x7f) << shift
+	if buf[i]&(1<<7) == 0 {
+		return i + 1, result, nil
+	}
+	shift += 7
+	i++
+	if shift >= 64 {
+		return 0, 0, fmt.Errorf("varint too long")
+	}
+	goto loop
+}
+
+func parseBatchHeader(buf []byte) (int32, uint64, error) {
+	idx := 0
+	if buf[idx] != atEvBatchStart {
+		return 0, 0, fmt.Errorf("expected batch start event")
+	}
+	idx++
+
+	n, pid, err := parseVarint(buf[idx:])
+	if err != nil {
+		return 0, 0, err
+	}
+	idx += n
+
+	if buf[idx] != atEvSync {
+		return 0, 0, fmt.Errorf("expected sync event")
+	}
+	idx++
+
+	_, ticks, err := parseVarint(buf[idx:])
+	if err != nil {
+		return 0, 0, err
+	}
+	return int32(pid), ticks, nil
+}
+
+const headerSize = 4
+
+const supportedVersion uint16 = (uint16(1) << 8) | 14
+
+func parseHeader(r Source) (uint16, error) {
+	var header [headerSize]byte
+	n, err := r.ReadAt(header[:], 0)
+	if n != 4 || err != nil {
+		return 0, err
+	}
+	version := uint16(header[2])<<8 | uint16(header[3])
+	return version, nil
+}
+
+// NewParser creates and initializes new Parser given a Source.
+//
+// Initialization may involve ordering the trace, which may be
+// computationally expensive.
+//
+// NewParser may fail if initialization, which may involve parsing
+// part of or all of the trace, fails.
+func NewParser(r Source) (*Parser, error) {
+	// Check some basic properties, like the size and the header.
+	if r.Len()%batchSize != headerSize {
+		return nil, fmt.Errorf("bad format: file must be a multiple of %d bytes", batchSize)
+	}
+	version, err := parseHeader(r)
+	if err != nil {
+		return nil, fmt.Errorf("failed to parse header: %v", err)
+	}
+	if version != supportedVersion {
+		return nil, fmt.Errorf("unsupported version")
+	}
+
+	// Figure out how to break up the initialization phase.
+	shards := runtime.GOMAXPROCS(-1)
+	numBatches := (r.Len() - headerSize) / batchSize
+	if shards > numBatches {
+		shards = 1
+	}
+	batchesPerShard := numBatches / shards
+	if numBatches%shards != 0 {
+		batchesPerShard = numBatches / (shards - 1)
+	}
+
+	// Build up a per-shard index.
+	perShardIndex := make([][][]batchOffset, shards)
+	var eg errgroup.Group
+	for i := 0; i < shards; i++ {
+		i := i
+		eg.Go(func() error {
+			const bufSize = 16
+			var buf [bufSize]byte
+
+			// Generate the index for this shard.
+			index := make([][]batchOffset, 16)
+			start := int64(batchesPerShard * i)
+			end := int64(batchesPerShard * (i + 1))
+			if end > int64(numBatches) {
+				end = int64(numBatches)
+			}
+			for idx := start*batchSize + headerSize; idx < end*batchSize+headerSize; idx += batchSize {
+				n, err := r.ReadAt(buf[:], idx)
+				if n < bufSize {
+					return err
+				}
+				pid, ticks, err := parseBatchHeader(buf[:])
+				if err != nil {
+					return err
+				}
+				if int(pid) >= len(index) {
+					index = append(index, make([][]batchOffset, int(pid)-len(index)+1)...)
+				}
+				index[pid] = append(index[pid], batchOffset{
+					startTicks: ticks,
+					fileOffset: idx,
+				})
+			}
+			// For each P, sort the batches in the index.
+			for pid := range index {
+				sort.Slice(index[pid], func(i, j int) bool {
+					return index[pid][i].startTicks < index[pid][j].startTicks
+				})
+			}
+			perShardIndex[i] = index
+			return nil
+		})
+	}
+	if err := eg.Wait(); err != nil {
+		return nil, err
+	}
+
+	// Count the maximum number of Ps we need to account for.
+	// Note that this may be more than the number of Ps actually
+	// represented in the trace.
+	maxP := 0
+	for i := range perShardIndex {
+		if ps := len(perShardIndex[i]); ps > maxP {
+			maxP = ps
+		}
+	}
+
+	// Count up how many batches there are for each P.
+	perPidBatches := make([]int, maxP)
+	for pid := range perPidBatches {
+		for i := 0; i < shards; i++ {
+			if pid < len(perShardIndex[i]) {
+				perPidBatches[pid] += len(perShardIndex[i][pid])
+			}
+		}
+	}
+
+	// Merge the per-shard indicies into one index, parallelizing
+	// across Ps.
+	index := make([][]batchOffset, maxP)
+	pidChan := make(chan int, shards)
+	var wg sync.WaitGroup
+	for i := 0; i < shards; i++ {
+		go func() {
+			for {
+				pid, ok := <-pidChan
+				if !ok {
+					return
+				}
+				for len(index[pid]) < perPidBatches[pid] {
+					minBatch := batchOffset{startTicks: ^uint64(0)}
+					minShard := -1
+					for i := 0; i < shards; i++ {
+						if pid < len(perShardIndex[i]) && len(perShardIndex[i][pid]) > 0 && perShardIndex[i][pid][0].startTicks < minBatch.startTicks {
+							minBatch = perShardIndex[i][pid][0]
+							minShard = i
+						}
+					}
+					perShardIndex[minShard][pid] = perShardIndex[minShard][pid][1:]
+					index[pid] = append(index[pid], minBatch)
+				}
+				wg.Done()
+			}
+		}()
+	}
+	for pid := range index {
+		if perPidBatches[pid] != 0 {
+			wg.Add(1)
+			pidChan <- pid
+		}
+	}
+	wg.Wait()
+	close(pidChan)
+
+	p := &Parser{
+		src:          r,
+		index:        index,
+		batches:      make([]batchReader, maxP),
+		totalBatches: uint64(r.Len()-headerSize) / batchSize,
+	}
+	for pid := range index {
+		if _, err := p.next(pid); err != nil {
+			return nil, fmt.Errorf("initializing parser: %v", err)
+		}
+	}
+	return p, nil
+}
+
+var doneEvent = Event{Timestamp: ^uint64(0)}
+var streamEnd = errors.New("stream end")
+
+type batchReader struct {
+	next       Event
+	syncTick   uint64
+	allocBase  [^uint8(0)]uint64
+	freeBase   uint64
+	sweepStart uint64
+	readBuf    []byte
+	batchBuf   [batchSize]byte
+}
+
+func (b *batchReader) nextEvent() error {
+	if len(b.readBuf) == 0 {
+		return streamEnd
+	}
+	haveEvent := false
+	b.next = Event{}
+	for !haveEvent {
+		size := 1
+		switch evKind := b.readBuf[0]; evKind {
+		case atEvSpanAcquire:
+			// Parse class.
+			class := b.readBuf[size]
+			size += 1
+
+			// Parse base address.
+			n, base, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing span base: %v", err)
+			}
+			size += n
+			b.allocBase[class] = base
+		case atEvAllocArray:
+			b.next.Array = true
+			fallthrough
+		case atEvAlloc:
+			haveEvent = true
+			b.next.Kind = EventAlloc
+
+			// Parse class for alloc event.
+			class := b.readBuf[size]
+			size += 1
+
+			// Parse offset for alloc event.
+			n, allocOffset, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing offset for alloc: %v", err)
+			}
+			size += n
+
+			// Parse size for alloc event.
+			n, allocSizeDiff, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing size for alloc: %v", err)
+			}
+			size += n
+
+			// Parse tick delta for alloc event.
+			n, tickDelta, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing tick delta for alloc: %v", err)
+			}
+			size += n
+
+			if class >= 2 && b.allocBase[class] == 0 {
+				return fmt.Errorf("allocation from unacquired span class %d", class)
+			}
+			b.next.Timestamp = b.syncTick + tickDelta
+			b.next.Address = b.allocBase[class] + allocOffset
+			b.next.Size = classToSize(class) - allocSizeDiff
+		case atEvAllocLargeArrayNoscan:
+			fallthrough
+		case atEvAllocLargeArray:
+			b.next.Array = true
+			fallthrough
+		case atEvAllocLargeNoscan:
+			fallthrough
+		case atEvAllocLarge:
+			haveEvent = true
+			b.next.Kind = EventAlloc
+
+			// Parse address for alloc event.
+			n, addr, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing address for large alloc: %v", err)
+			}
+			size += n
+
+			// Parse size for alloc event.
+			n, allocSize, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing size for large alloc: %v", err)
+			}
+			size += n
+
+			// Parse tick delta for alloc event.
+			n, tickDelta, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing tick delta for alloc: %v", err)
+			}
+			size += n
+
+			b.next.Timestamp = b.syncTick + tickDelta
+			b.next.Address = addr
+			b.next.Size = allocSize
+		case atEvSpanRelease:
+			// Parse class.
+			class := b.readBuf[size]
+			size += 1
+
+			if b.allocBase[class] == 0 {
+				return fmt.Errorf("release of unacquired span class")
+			}
+			b.allocBase[class] = 0
+		case atEvSweep:
+			// Parse tick delta for sweep event.
+			n, tickDelta, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing ticks for sweep: %v", err)
+			}
+			size += n
+			b.sweepStart = b.syncTick + tickDelta
+
+			// Parse base address for sweep event.
+			n, base, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing tick delta for sweep: %v", err)
+			}
+			size += n
+			b.freeBase = base
+		case atEvFree:
+			haveEvent = true
+			b.next.Kind = EventFree
+
+			// Parse offset for free event.
+			n, freeOffset, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing offset for free: %v", err)
+			}
+			size += n
+
+			b.next.Timestamp = b.sweepStart
+			b.next.Address = b.freeBase + freeOffset
+		case atEvMarkTerm:
+			haveEvent = true
+			b.next.Kind = EventGCEnd
+
+			n, tickDelta, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing mark termination event timestamp: %v", err)
+			}
+			size += n
+
+			b.next.Timestamp = b.syncTick + tickDelta
+		case atEvSync:
+			n, ticks, err := parseVarint(b.readBuf[size:])
+			if err != nil {
+				return fmt.Errorf("parsing sync event timestamp: %v", err)
+			}
+			size += n
+			b.syncTick = ticks
+		case atEvBatchEnd:
+			return streamEnd
+		case atEvBatchStart:
+			return fmt.Errorf("unexpected header found")
+		default:
+			return fmt.Errorf("unknown event type %d", evKind)
+		}
+		b.readBuf = b.readBuf[size:]
+	}
+	return nil
+}
+
+func (p *Parser) peek(pid int) uint64 {
+	return p.batches[pid].next.Timestamp
+}
+
+func (p *Parser) refill(pid int) error {
+	// If we're out of batches, just mark
+	// this P as done.
+	if len(p.index[pid]) == 0 {
+		p.batches[pid].next = doneEvent
+		return nil
+	}
+	// Grab the next batch for this P.
+	bo := p.index[pid][0]
+	p.index[pid] = p.index[pid][1:]
+
+	// Read in the batch.
+	br := &p.batches[pid]
+	n, err := p.src.ReadAt(br.batchBuf[:], bo.fileOffset)
+	if n != len(br.batchBuf) {
+		return err
+	}
+
+	// Skip the header.
+	br.readBuf = br.batchBuf[bo.headerSize():]
+
+	// Set the sync event tick for this batch,
+	// which was present in the header.
+	br.syncTick = bo.startTicks
+
+	// Read the next event.
+	if err := br.nextEvent(); err != nil && err != streamEnd {
+		return fmt.Errorf("refill: P %d: %v", pid, err)
+	}
+	return nil
+}
+
+func (p *Parser) next(pid int) (Event, error) {
+	// Grab the current event first.
+	ev := p.batches[pid].next
+	ev.P = int32(pid)
+
+	// Get the next event.
+	if err := p.batches[pid].nextEvent(); err != nil && err != streamEnd {
+		return Event{}, fmt.Errorf("P %d: %v", pid, err)
+	} else if err == streamEnd {
+		// We've run out of things to parse for this P! Refill.
+		if err := p.refill(pid); err != nil {
+			return Event{}, err
+		}
+	}
+	return ev, nil
+}
+
+// Progress returns a float64 value between 0 and 1 indicating the
+// approximate progress of parsing through the file.
+func (p *Parser) Progress() float64 {
+	left := uint64(0)
+	for _, perPBatches := range p.index {
+		left += uint64(len(perPBatches))
+	}
+	return float64(p.totalBatches-left) / float64(p.totalBatches)
+}
+
+// Next returns the next event in the trace, or an error
+// if the parser failed to parse the next event out of the trace.
+func (p *Parser) Next() (Event, error) {
+	// Compute which P has the next event.
+	minPid := -1
+	minTick := ^uint64(0)
+	for pid := range p.batches {
+		if t := p.peek(pid); t < minTick {
+			minTick = t
+			minPid = pid
+		}
+	}
+
+	// If there's no such event, signal that we're done.
+	if minPid < 0 {
+		return Event{}, io.EOF
+	}
+
+	// Return the event, and compute the next.
+	return p.next(minPid)
+}
diff --git a/simulation/toolbox/address_set.go b/simulation/toolbox/address_set.go
new file mode 100644
index 0000000..fad9410
--- /dev/null
+++ b/simulation/toolbox/address_set.go
@@ -0,0 +1,65 @@
+package toolbox
+
+// AddressSet is a set of addresses laid out for efficient
+// memory use and access.
+type AddressSet struct {
+	// m is a 4-level radix structure.
+	//
+	// The bottom level is a bitmap, with one bit per byte.
+	m [1 << 16]*[1 << 16]*[1 << 16]*[(1 << 16) / 8]uint8
+}
+
+// Add adds a new address to the AddressSet.
+//
+// Returns true on success. That is, if the address
+// was not already present in the map.
+func (a *AddressSet) Add(addr uint64) bool {
+	l1 := &a.m[addr>>48]
+	if *l1 == nil {
+		*l1 = new([1 << 16]*[1 << 16]*[(1 << 16) / 8]uint8)
+	}
+	l2 := &((*l1)[(addr>>32)&0xffff])
+	if *l2 == nil {
+		*l2 = new([1 << 16]*[(1 << 16) / 8]uint8)
+	}
+	l3 := &((*l2)[(addr>>16)&0xffff])
+	if *l3 == nil {
+		*l3 = new([(1 << 16) / 8]uint8)
+	}
+	c := *l3
+	i := addr & 0xffff
+	mask := uint8(1) << (i % 8)
+	idx := i / 8
+	if c[idx]&mask != 0 {
+		return false
+	}
+	c[idx] |= mask
+	return true
+}
+
+// Remove removes an address from the AddressSet.
+//
+// Returns true on success. That is, if the address
+// was present in the map.
+func (a *AddressSet) Remove(addr uint64) bool {
+	l1 := a.m[addr>>48]
+	if l1 == nil {
+		return false
+	}
+	l2 := l1[(addr>>32)&0xffff]
+	if l2 == nil {
+		return false
+	}
+	l3 := l2[(addr>>16)&0xffff]
+	if l3 == nil {
+		return false
+	}
+	i := addr & 0xffff
+	mask := uint8(1) << (i % 8)
+	idx := i / 8
+	if l3[idx]&mask == 0 {
+		return false
+	}
+	l3[idx] &^= mask
+	return true
+}
diff --git a/sizeclasses.go b/sizeclasses.go
new file mode 100644
index 0000000..1ae9277
--- /dev/null
+++ b/sizeclasses.go
@@ -0,0 +1,85 @@
+package goat
+
+// class  bytes/obj  bytes/span  objects  tail waste  max waste
+//     1          8        8192     1024           0     87.50%
+//     2         16        8192      512           0     43.75%
+//     3         32        8192      256           0     46.88%
+//     4         48        8192      170          32     31.52%
+//     5         64        8192      128           0     23.44%
+//     6         80        8192      102          32     19.07%
+//     7         96        8192       85          32     15.95%
+//     8        112        8192       73          16     13.56%
+//     9        128        8192       64           0     11.72%
+//    10        144        8192       56         128     11.82%
+//    11        160        8192       51          32      9.73%
+//    12        176        8192       46          96      9.59%
+//    13        192        8192       42         128      9.25%
+//    14        208        8192       39          80      8.12%
+//    15        224        8192       36         128      8.15%
+//    16        240        8192       34          32      6.62%
+//    17        256        8192       32           0      5.86%
+//    18        288        8192       28         128     12.16%
+//    19        320        8192       25         192     11.80%
+//    20        352        8192       23          96      9.88%
+//    21        384        8192       21         128      9.51%
+//    22        416        8192       19         288     10.71%
+//    23        448        8192       18         128      8.37%
+//    24        480        8192       17          32      6.82%
+//    25        512        8192       16           0      6.05%
+//    26        576        8192       14         128     12.33%
+//    27        640        8192       12         512     15.48%
+//    28        704        8192       11         448     13.93%
+//    29        768        8192       10         512     13.94%
+//    30        896        8192        9         128     15.52%
+//    31       1024        8192        8           0     12.40%
+//    32       1152        8192        7         128     12.41%
+//    33       1280        8192        6         512     15.55%
+//    34       1408       16384       11         896     14.00%
+//    35       1536        8192        5         512     14.00%
+//    36       1792       16384        9         256     15.57%
+//    37       2048        8192        4           0     12.45%
+//    38       2304       16384        7         256     12.46%
+//    39       2688        8192        3         128     15.59%
+//    40       3072       24576        8           0     12.47%
+//    41       3200       16384        5         384      6.22%
+//    42       3456       24576        7         384      8.83%
+//    43       4096        8192        2           0     15.60%
+//    44       4864       24576        5         256     16.65%
+//    45       5376       16384        3         256     10.92%
+//    46       6144       24576        4           0     12.48%
+//    47       6528       32768        5         128      6.23%
+//    48       6784       40960        6         256      4.36%
+//    49       6912       49152        7         768      3.37%
+//    50       8192        8192        1           0     15.61%
+//    51       9472       57344        6         512     14.28%
+//    52       9728       49152        5         512      3.64%
+//    53      10240       40960        4           0      4.99%
+//    54      10880       32768        3         128      6.24%
+//    55      12288       24576        2           0     11.45%
+//    56      13568       40960        3         256      9.99%
+//    57      14336       57344        4           0      5.35%
+//    58      16384       16384        1           0     12.49%
+//    59      18432       73728        4           0     11.11%
+//    60      19072       57344        3         128      3.57%
+//    61      20480       40960        2           0      6.87%
+//    62      21760       65536        3         256      6.25%
+//    63      24576       24576        1           0     11.45%
+//    64      27264       81920        3         128     10.00%
+//    65      28672       57344        2           0      4.91%
+//    66      32768       32768        1           0     12.50%
+
+const numSizeClasses = 67
+
+var sizeClassToSize = [numSizeClasses]uint64{
+	0, 8, 16, 32, 48, 64, 80, 96, 112, 128,
+	144, 160, 176, 192, 208, 224, 240, 256, 288, 320,
+	352, 384, 416, 448, 480, 512, 576, 640, 704, 768,
+	896, 1024, 1152, 1280, 1408, 1536, 1792, 2048, 2304, 2688,
+	3072, 3200, 3456, 4096, 4864, 5376, 6144, 6528, 6784, 6912,
+	8192, 9472, 9728, 10240, 10880, 12288, 13568, 14336, 16384, 18432,
+	19072, 20480, 21760, 24576, 27264, 28672, 32768,
+}
+
+func classToSize(class uint8) uint64 {
+	return sizeClassToSize[class>>1]
+}