Skip to content

Commit

Permalink
cmd/compile: regalloc: handle desired registers of 2-output insns
Browse files Browse the repository at this point in the history
Particularly with 2-word load instructions, this becomes important.
Classic example is:

    func f(p *string) string {
        return *p
    }

We want the two loads to put the return values directly into
the two ABI return registers.

At this point in the stack, cmd/go is 1.1% smaller.

Change-Id: I51fd1710238e81d15aab2bfb816d73c8e7c207b1
Reviewed-on: https://go-review.googlesource.com/c/go/+/631137
Reviewed-by: David Chase <[email protected]>
Reviewed-by: Keith Randall <[email protected]>
LUCI-TryBot-Result: Go LUCI <[email protected]>
  • Loading branch information
randall77 committed Feb 13, 2025
1 parent 20d7c57 commit a0029e9
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
21 changes: 20 additions & 1 deletion src/cmd/compile/internal/ssa/regalloc.go
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,7 @@ func (s *regAllocState) regalloc(f *Func) {

// Data structure used for computing desired registers.
var desired desiredState
desiredSecondReg := map[ID][4]register{} // desired register allocation for 2nd part of a tuple

// Desired registers for inputs & outputs for each instruction in the block.
type dentry struct {
Expand All @@ -949,6 +950,7 @@ func (s *regAllocState) regalloc(f *Func) {
s.curBlock = b
s.startRegsMask = 0
s.usedSinceBlockStart = 0
clear(desiredSecondReg)

// Initialize regValLiveSet and uses fields for this block.
// Walk backwards through the block doing liveness analysis.
Expand Down Expand Up @@ -1346,6 +1348,11 @@ func (s *regAllocState) regalloc(f *Func) {
}
dinfo[i].in[j] = desired.get(a.ID)
}
if v.Op == OpSelect1 && prefs[0] != noRegister {
// Save desired registers of select1 for
// use by the tuple generating instruction.
desiredSecondReg[v.Args[0].ID] = prefs
}
}

// Process all the non-phi values.
Expand Down Expand Up @@ -1748,6 +1755,17 @@ func (s *regAllocState) regalloc(f *Func) {
}
}
}
if out.idx == 1 {
if prefs, ok := desiredSecondReg[v.ID]; ok {
for _, r := range prefs {
if r != noRegister && (mask&^s.used)>>r&1 != 0 {
// Desired register is allowed and unused.
mask = regMask(1) << r
break
}
}
}
}
// Avoid registers we're saving for other values.
if mask&^desired.avoid&^s.nospill&^s.used != 0 {
mask &^= desired.avoid
Expand Down Expand Up @@ -2874,7 +2892,8 @@ type desiredStateEntry struct {
// Registers it would like to be in, in priority order.
// Unused slots are filled with noRegister.
// For opcodes that return tuples, we track desired registers only
// for the first element of the tuple.
// for the first element of the tuple (see desiredSecondReg for
// tracking the desired register for second part of a tuple).
regs [4]register
}

Expand Down
10 changes: 10 additions & 0 deletions test/codegen/memcombine.go
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,16 @@ func dwloadArg(a [2]int64) int64 {
return a[0] + a[1]
}

func dwloadResult1(p *string) string {
// arm64:"LDP\t\\(R0\\), \\(R0, R1\\)"
return *p
}

func dwloadResult2(p *[2]int64) (int64, int64) {
// arm64:"LDP\t\\(R0\\), \\(R1, R0\\)"
return p[1], p[0]
}

// ---------------------------------- //
// Arm64 double-register stores //
// ---------------------------------- //
Expand Down

0 comments on commit a0029e9

Please sign in to comment.