Skip to content

Commit

Permalink
Add AVX2 code path for decodeText()
Browse files Browse the repository at this point in the history
  • Loading branch information
kaiburjack committed Aug 14, 2022
1 parent 92eb276 commit 4b6ddef
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 5 deletions.
53 changes: 50 additions & 3 deletions decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,19 @@ import (
"io"
)

var canUseSSE = cpuid.CPU.Has(cpuid.SSE) && cpuid.CPU.Has(cpuid.BMI1)
var canUseSSE = cpuid.CPU.Has(cpuid.SSE2) && cpuid.CPU.Has(cpuid.BMI1)
var canUseAVX2 = canUseSSE && cpuid.CPU.Has(cpuid.AVX2)
var simdWidth int

func init() {
if canUseAVX2 {
simdWidth = 32
} else if canUseSSE {
simdWidth = 16
} else {
simdWidth = 0
}
}

// Decoder decodes an XML input stream into Token values.
type Decoder interface {
Expand Down Expand Up @@ -64,7 +76,7 @@ func (thiz *decoder) read0() error {
thiz.w -= thiz.r
thiz.r = 0
}
n, err := thiz.rd.Read(thiz.rb[thiz.w : cap(thiz.rb)-16])
n, err := thiz.rd.Read(thiz.rb[thiz.w : cap(thiz.rb)-simdWidth])
thiz.w += n
if n <= 0 && err != nil {
return err
Expand Down Expand Up @@ -359,8 +371,43 @@ func (thiz *decoder) decodeTextSSE(t *Token) (bool, error) {
}
}

func (thiz *decoder) decodeTextAVX2(t *Token) (bool, error) {
i := len(thiz.bb)
onlyWhitespaces := true
for {
j := thiz.r
c := 0
for thiz.w > thiz.r+c {
sidx := findFirstOpenAngleBracket32(thiz.rb[j+c : thiz.w])
onlyWhitespaces = onlyWhitespaces && onlySpacesUntil32(thiz.rb[j+c:thiz.w], sidx)
c += sidx
if sidx != 32 {
_, err := thiz.discard(c)
if err != nil {
return false, err
}
if onlyWhitespaces && !thiz.preserveWhitespaces[thiz.top] {
return true, nil
}
thiz.bb = append(thiz.bb, thiz.rb[j:j+c]...)
t.Kind = TokenTypeTextElement
t.ByteData = thiz.bb[i:len(thiz.bb)]
return false, nil
}
}
thiz.bb = append(thiz.bb, thiz.rb[j:thiz.w]...)
thiz.discardBuffer()
err := thiz.read0()
if err != nil {
return false, err
}
}
}

func (thiz *decoder) decodeText(t *Token) (bool, error) {
if canUseSSE {
if canUseAVX2 {
return thiz.decodeTextAVX2(t)
} else if canUseSSE {
return thiz.decodeTextSSE(t)
}
return thiz.decodeTextGeneric(t)
Expand Down
14 changes: 14 additions & 0 deletions sse_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,17 @@ func onlySpaces16([]uint8) uint16
func onlySpacesUntil16(slice []uint8, n int) bool {
return onlySpaces16(slice)<<(16-n) == 0
}

//go:noescape
func openAngleBracket32([]uint8) int

func findFirstOpenAngleBracket32(slice []uint8) int {
return openAngleBracket32(slice)
}

//go:noescape
func onlySpaces32([]uint8) uint32

func onlySpacesUntil32(slice []uint8, n int) bool {
return onlySpaces32(slice)<<(32-n) == 0
}
31 changes: 29 additions & 2 deletions sse_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

DATA ·oab<>+0(SB)/8, $0x3C3C3C3C3C3C3C3C
DATA ·oab<>+8(SB)/8, $0x3C3C3C3C3C3C3C3C
GLOBL ·oab<>(SB), NOPTR+RODATA, $16
DATA ·oab<>+16(SB)/8, $0x3C3C3C3C3C3C3C3C
DATA ·oab<>+24(SB)/8, $0x3C3C3C3C3C3C3C3C
GLOBL ·oab<>(SB), NOPTR+RODATA, $32

DATA ·spc<>+0(SB)/8, $0x2020202020202020
DATA ·spc<>+8(SB)/8, $0x2020202020202020
GLOBL ·spc<>(SB), NOPTR+RODATA, $16
DATA ·spc<>+16(SB)/8, $0x2020202020202020
DATA ·spc<>+24(SB)/8, $0x2020202020202020
GLOBL ·spc<>(SB), NOPTR+RODATA, $32

TEXT ·openAngleBracket16(SB),NOSPLIT, $0
MOVQ arg+0(FP), DI
Expand All @@ -17,6 +21,16 @@ TEXT ·openAngleBracket16(SB),NOSPLIT, $0
MOVW AX, ret+24(FP)
RET

TEXT ·openAngleBracket32(SB),NOSPLIT, $0
MOVQ arg+0(FP), DI
VMOVDQU (DI), Y0
VPCMPEQB ·oab<>(SB), Y0, Y0
VPMOVMSKB Y0, AX
TZCNTL AX, AX
MOVQ AX, ret+24(FP)
VZEROUPPER // <- https://i.stack.imgur.com/dGpbi.png
RET

TEXT ·onlySpaces16(SB),NOSPLIT, $0
MOVQ arg+0(FP), DI
MOVOU (DI), X0
Expand All @@ -28,3 +42,16 @@ TEXT ·onlySpaces16(SB),NOSPLIT, $0
PMOVMSKB X0, AX
MOVW AX, ret+24(FP)
RET

TEXT ·onlySpaces32(SB),NOSPLIT, $0
MOVQ arg+0(FP), DI
VMOVDQU (DI), Y0
VMOVDQA Y0, Y1
VPCMPGTB ·spc<>(SB), Y0, Y0
VPXOR Y2, Y2, Y2
VPCMPGTB Y1, Y2, Y2
VPOR Y2, Y0, Y0
VPMOVMSKB Y0, AX
MOVL AX, ret+24(FP)
VZEROUPPER // <- https://i.stack.imgur.com/dGpbi.png
RET
10 changes: 10 additions & 0 deletions sse_amd64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,18 @@ func TestOnlySpaces16(t *testing.T) {
assert.Equal(t, uint16(0x18), onlySpaces16([]uint8{0x20, 0x20, 0x20, 0xC2, 0xA7, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}))
}

func TestOnlySpaces32(t *testing.T) {
assert.Equal(t, uint32(0x18), onlySpaces32([]uint8{0x20, 0x20, 0x20, 0xC2, 0xA7, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}))
}

func TestOnlySpacesUntil16(t *testing.T) {
assert.True(t, onlySpacesUntil16([]uint8{0x20, 0x20, 0x20, 0xC2, 0xA7, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, 3))
assert.True(t, onlySpacesUntil16([]uint8{0x20, 0x20, 0x20, 0xC2, 0xA7, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, 2))
assert.False(t, onlySpacesUntil16([]uint8{0x20, 0x20, 0x20, 0xC2, 0xA7, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, 4))
}

func TestOnlySpacesUntil32(t *testing.T) {
assert.True(t, onlySpacesUntil32([]uint8{0x20, 0x20, 0x20, 0xC2, 0xA7, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, 3))
assert.True(t, onlySpacesUntil32([]uint8{0x20, 0x20, 0x20, 0xC2, 0xA7, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, 2))
assert.False(t, onlySpacesUntil32([]uint8{0x20, 0x20, 0x20, 0xC2, 0xA7, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, 4))
}

0 comments on commit 4b6ddef

Please sign in to comment.