Skip to content

Commit

Permalink
Split architecture-specific code into separate files
Browse files Browse the repository at this point in the history
Those will be conditionally compiled by the Go compiler
based on the target architecture due to their name suffix.
  • Loading branch information
kaiburjack committed Aug 18, 2022
1 parent 6ac37ac commit 4da9b24
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 156 deletions.
157 changes: 1 addition & 156 deletions decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,9 @@ import (
"bytes"
"errors"
"fmt"
"github.com/klauspost/cpuid/v2"
"io"
)

var canUseSSE = cpuid.CPU.Has(cpuid.SSE2) && cpuid.CPU.Has(cpuid.BMI1)
var canUseAVX2 = canUseSSE && cpuid.CPU.Has(cpuid.AVX2)
var simdWidth int

func init() {
if canUseAVX2 {
simdWidth = 32
} else if canUseSSE {
simdWidth = 16
} else {
simdWidth = 0
}
}

// Decoder decodes an XML input stream into Token values.
type Decoder interface {
// NextToken decodes and stores the next Token into
Expand Down Expand Up @@ -55,6 +40,7 @@ var (
bsxml = []byte("xml")
bsspace = []byte("space")
bspreserve = []byte("preserve")
simdWidth int
)

// NewDecoder creates a new Decoder.
Expand Down Expand Up @@ -125,72 +111,6 @@ func (thiz *decoder) Reset(r io.Reader) {
thiz.lastStartElement = false
}

func (thiz *decoder) skipWhitespaces(b byte) (byte, error) {
if canUseAVX2 {
return thiz.skipWhitespacesAVX2(b)
} else if canUseSSE {
return thiz.skipWhitespacesSSE(b)
}
return thiz.skipWhitespacesGeneric(b)
}

func (thiz *decoder) skipWhitespacesAVX2(b byte) (byte, error) {
if !isWhitespace(b) {
return b, nil
}
for {
for thiz.w > thiz.r {
sidx := int(onlySpaces32(thiz.rb[thiz.r:thiz.w]))
_, err := thiz.discard(sidx)
if err != nil {
return 0, err
}
if sidx != 32 {
newB, err := thiz.readByte()
if err != nil {
return 0, err
}
return newB, nil
}
}
thiz.discardBuffer()
err := thiz.read0()
if err != nil {
return 0, err
}
}
}

func (thiz *decoder) skipWhitespacesSSE(b byte) (byte, error) {
if !isWhitespace(b) {
return b, nil
}
for {
j := thiz.r
c := 0
for thiz.w > thiz.r+c {
sidx := onlySpaces32(thiz.rb[j+c : thiz.w])
c += int(sidx)
if sidx != 16 {
_, err := thiz.discard(c)
if err != nil {
return 0, err
}
newB, err := thiz.readByte()
if err != nil {
return 0, err
}
return newB, nil
}
}
thiz.discardBuffer()
err := thiz.read0()
if err != nil {
return 0, err
}
}
}

func (thiz *decoder) skipWhitespacesGeneric(b byte) (byte, error) {
for {
if !isWhitespace(b) {
Expand Down Expand Up @@ -412,81 +332,6 @@ func (thiz *decoder) decodeStartElement(t *Token) error {
return nil
}

func (thiz *decoder) decodeTextSSE(t *Token) (bool, error) {
i := len(thiz.bb)
onlyWhitespaces := true
for {
j := thiz.r
c := 0
for thiz.w > thiz.r+c {
sidx := openAngleBracket16(thiz.rb[j+c : thiz.w])
onlyWhitespaces = onlyWhitespaces && onlySpaces16(thiz.rb[j+c:thiz.w]) >= sidx
c += int(sidx)
if sidx != 16 {
_, err := thiz.discard(c)
if err != nil {
return false, err
}
if onlyWhitespaces && !thiz.preserveWhitespaces[thiz.top] {
return true, nil
}
thiz.bb = append(thiz.bb, thiz.rb[j:j+c]...)
t.Kind = TokenTypeTextElement
t.ByteData = thiz.bb[i:len(thiz.bb)]
return false, nil
}
}
thiz.bb = append(thiz.bb, thiz.rb[j:thiz.w]...)
thiz.discardBuffer()
err := thiz.read0()
if err != nil {
return false, err
}
}
}

func (thiz *decoder) decodeTextAVX2(t *Token) (bool, error) {
i := len(thiz.bb)
onlyWhitespaces := true
for {
j := thiz.r
c := 0
for thiz.w > thiz.r+c {
sidx := openAngleBracket32(thiz.rb[j+c : thiz.w])
onlyWhitespaces = onlyWhitespaces && onlySpaces32(thiz.rb[j+c:thiz.w]) >= sidx
c += int(sidx)
if sidx != 32 {
_, err := thiz.discard(c)
if err != nil {
return false, err
}
if onlyWhitespaces && !thiz.preserveWhitespaces[thiz.top] {
return true, nil
}
thiz.bb = append(thiz.bb, thiz.rb[j:j+c]...)
t.Kind = TokenTypeTextElement
t.ByteData = thiz.bb[i:len(thiz.bb)]
return false, nil
}
}
thiz.bb = append(thiz.bb, thiz.rb[j:thiz.w]...)
thiz.discardBuffer()
err := thiz.read0()
if err != nil {
return false, err
}
}
}

func (thiz *decoder) decodeText(t *Token) (bool, error) {
if canUseAVX2 {
return thiz.decodeTextAVX2(t)
} else if canUseSSE {
return thiz.decodeTextSSE(t)
}
return thiz.decodeTextGeneric(t)
}

func (thiz *decoder) decodeTextGeneric(t *Token) (bool, error) {
i := len(thiz.bb)
onlyWhitespaces := true
Expand Down
155 changes: 155 additions & 0 deletions decoder_amd64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
package gosaxml

import "github.com/klauspost/cpuid/v2"

var canUseSSE = cpuid.CPU.Has(cpuid.SSE2) && cpuid.CPU.Has(cpuid.BMI1)
var canUseAVX2 = canUseSSE && cpuid.CPU.Has(cpuid.AVX2)

func init() {
if canUseAVX2 {
simdWidth = 32
} else if canUseSSE {
simdWidth = 16
}
}

func (thiz *decoder) skipWhitespaces(b byte) (byte, error) {
if canUseAVX2 {
return thiz.skipWhitespacesAVX2(b)
} else if canUseSSE {
return thiz.skipWhitespacesSSE(b)
}
return thiz.skipWhitespacesGeneric(b)
}

func (thiz *decoder) skipWhitespacesAVX2(b byte) (byte, error) {
if !isWhitespace(b) {
return b, nil
}
for {
for thiz.w > thiz.r {
sidx := int(onlySpaces32(thiz.rb[thiz.r:thiz.w]))
_, err := thiz.discard(sidx)
if err != nil {
return 0, err
}
if sidx != 32 {
newB, err := thiz.readByte()
if err != nil {
return 0, err
}
return newB, nil
}
}
thiz.discardBuffer()
err := thiz.read0()
if err != nil {
return 0, err
}
}
}

func (thiz *decoder) skipWhitespacesSSE(b byte) (byte, error) {
if !isWhitespace(b) {
return b, nil
}
for {
j := thiz.r
c := 0
for thiz.w > thiz.r+c {
sidx := onlySpaces32(thiz.rb[j+c : thiz.w])
c += int(sidx)
if sidx != 16 {
_, err := thiz.discard(c)
if err != nil {
return 0, err
}
newB, err := thiz.readByte()
if err != nil {
return 0, err
}
return newB, nil
}
}
thiz.discardBuffer()
err := thiz.read0()
if err != nil {
return 0, err
}
}
}

func (thiz *decoder) decodeText(t *Token) (bool, error) {
if canUseAVX2 {
return thiz.decodeTextAVX2(t)
} else if canUseSSE {
return thiz.decodeTextSSE(t)
}
return thiz.decodeTextGeneric(t)
}

func (thiz *decoder) decodeTextSSE(t *Token) (bool, error) {
i := len(thiz.bb)
onlyWhitespaces := true
for {
j := thiz.r
c := 0
for thiz.w > thiz.r+c {
sidx := openAngleBracket16(thiz.rb[j+c : thiz.w])
onlyWhitespaces = onlyWhitespaces && onlySpaces16(thiz.rb[j+c:thiz.w]) >= sidx
c += int(sidx)
if sidx != 16 {
_, err := thiz.discard(c)
if err != nil {
return false, err
}
if onlyWhitespaces && !thiz.preserveWhitespaces[thiz.top] {
return true, nil
}
thiz.bb = append(thiz.bb, thiz.rb[j:j+c]...)
t.Kind = TokenTypeTextElement
t.ByteData = thiz.bb[i:len(thiz.bb)]
return false, nil
}
}
thiz.bb = append(thiz.bb, thiz.rb[j:thiz.w]...)
thiz.discardBuffer()
err := thiz.read0()
if err != nil {
return false, err
}
}
}

func (thiz *decoder) decodeTextAVX2(t *Token) (bool, error) {
i := len(thiz.bb)
onlyWhitespaces := true
for {
j := thiz.r
c := 0
for thiz.w > thiz.r+c {
sidx := openAngleBracket32(thiz.rb[j+c : thiz.w])
onlyWhitespaces = onlyWhitespaces && onlySpaces32(thiz.rb[j+c:thiz.w]) >= sidx
c += int(sidx)
if sidx != 32 {
_, err := thiz.discard(c)
if err != nil {
return false, err
}
if onlyWhitespaces && !thiz.preserveWhitespaces[thiz.top] {
return true, nil
}
thiz.bb = append(thiz.bb, thiz.rb[j:j+c]...)
t.Kind = TokenTypeTextElement
t.ByteData = thiz.bb[i:len(thiz.bb)]
return false, nil
}
}
thiz.bb = append(thiz.bb, thiz.rb[j:thiz.w]...)
thiz.discardBuffer()
err := thiz.read0()
if err != nil {
return false, err
}
}
}
9 changes: 9 additions & 0 deletions decoder_arm64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package gosaxml

func (thiz *decoder) skipWhitespaces(b byte) (byte, error) {
return thiz.skipWhitespacesGeneric(b)
}

func (thiz *decoder) decodeText(t *Token) (bool, error) {
return thiz.decodeTextGeneric(t)
}

0 comments on commit 4da9b24

Please sign in to comment.