Skip to content

Commit

Permalink
x/text/internal/colltab: Improve numeric sorting
Browse files Browse the repository at this point in the history
Fixes golang/go#25554
Sorts zero (0) as the first number instead of the last
Sorts numbers with leading zeros after numbers with less leading zeros
  • Loading branch information
lordwelch committed May 5, 2024
1 parent 8d533a0 commit a905441
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 22 deletions.
10 changes: 9 additions & 1 deletion collate/collate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,15 @@ func TestNumeric(t *testing.T) {
{"A-1", "A-2", -1},
{"A-2", "A-12", -1},
{"A-12", "A-2", 1},
{"A-0001", "A-1", 0},
{"A-0001", "A-1", 1},
{"A-0000", "A-1", -1},
{"0000-", "1-", -1},
{"00001", "1", 1},
{"00", "01", -1},
{"0", "00", -1},
{"01", "001", -1},
{"01", "1", 1},
{"1", "01", -1},
} {
if got := c.CompareString(tt.a, tt.b); got != tt.want {
t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want)
Expand Down
21 changes: 19 additions & 2 deletions internal/colltab/numeric.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,20 @@ func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
}
// ce might have been grown already, so take it instead of buf.
nc.init(ce, len(buf), isZero)
old_index := len(nc.elems)
for n < len(s) {
ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
nc.b = s
n += sz
if !nc.update(ce) {
break
}
old_index = len(nc.elems)
}
nc.elems = append(nc.elems, 0)
copy(nc.elems[old_index+1:], nc.elems[old_index:])
nc.elems[old_index], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0)

return nc.result(), n
}

Expand All @@ -105,14 +111,20 @@ func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n
return ce, n
}
nc.init(ce, len(buf), isZero)
old_index := len(nc.elems)
for n < len(s) {
ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
nc.s = s
n += sz
if !nc.update(ce) {
break
}
old_index = len(nc.elems)
}
nc.elems = append(nc.elems, 0)
copy(nc.elems[old_index+1:], nc.elems[old_index:])
nc.elems[old_index], _ = MakeElem(nc.zero+1, defaultSecondary, defaultTertiary, 0)

return nc.result(), n
}

Expand All @@ -122,6 +134,7 @@ type numberConverter struct {
elems []Elem
nDigits int
lenIndex int
zero int

s string // set if the input was of type string
b []byte // set if the input was of type []byte
Expand All @@ -133,6 +146,7 @@ func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
// Insert a marker indicating the start of a number and a placeholder
// for the number of digits.
if isZero {
nc.zero++
elems = append(elems[:oldLen], nc.w.numberStart, 0)
} else {
elems = append(elems, 0, 0)
Expand Down Expand Up @@ -217,20 +231,23 @@ const maxDigits = 1<<maxPrimaryBits - 1
func (nc *numberConverter) update(elems []Elem) bool {
isZero, ok := nc.checkNextDigit(elems)
if nc.nDigits == 0 && isZero {
if nc.zero+1 < maxDigits {
nc.zero++
}
return true
}
nc.elems = elems
if !ok {
return false
}
nc.nDigits++
return nc.nDigits < maxDigits
return nc.nDigits+1 < maxDigits
}

// result fills in the length element for the digit sequence and returns the
// completed collation elements.
func (nc *numberConverter) result() []Elem {
e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0)
e, _ := MakeElem(nc.nDigits+1, defaultSecondary, defaultTertiary, 0)
nc.elems[nc.lenIndex] = e
return nc.elems
}
59 changes: 40 additions & 19 deletions internal/colltab/numeric_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,34 +78,39 @@ func TestNumericAppendNext(t *testing.T) {
{"a", p(5)},
{"klm", p(99)},
{"aa", p(5, 5)},
{"1", p(120, 1, 101)},
{"0", p(120, 0)},
{"01", p(120, 1, 101)},
{"0001", p(120, 1, 101)},
{"10", p(120, 2, 101, 100)},
{"99", p(120, 2, 119, 119)},
{"9999", p(120, 4, 119, 119, 119, 119)},
{"1a", p(120, 1, 101, 5)},
{"0b", p(120, 0, 6)},
{"01c", p(120, 1, 101, 8, 2)},
{"10x", p(120, 2, 101, 100, 200)},
{"99y", p(120, 2, 119, 119, 201)},
{"9999nop", p(120, 4, 119, 119, 119, 119, 121)},
{"1", p(120, 2, 101, 1)},
{"0", p(120, 1, 2)},
{"00", p(120, 1, 3)},
{"01", p(120, 2, 101, 2)},
{"02", p(120, 2, 102, 2)},
{"0001", p(120, 2, 101, 4)},
{"10", p(120, 3, 101, 100, 1)},
{"99", p(120, 3, 119, 119, 1)},
{"9999", p(120, 5, 119, 119, 119, 119, 1)},
{"1a", p(120, 2, 101, 1, 5)},
{"0b", p(120, 1, 2, 6)},
{"01c", p(120, 2, 101, 2, 8, 2)},
{"10x", p(120, 3, 101, 100, 1, 200)},
{"99y", p(120, 3, 119, 119, 1, 201)},
{"9999nop", p(120, 5, 119, 119, 119, 119, 1, 121)},
{"a0001", p(5, 120, 2, 101, 4)},
{"a1", p(5, 120, 2, 101, 1)},

// Allow follow-up collation elements if they have a zero non-primary.
{"١٢٩", []Elem{e(120), e(3), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}},
{"١٢٩", []Elem{e(120), e(4), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3, e(1)}},
{
"129",
[]Elem{
e(120), e(3),
e(120), e(4),
e(101, digSec, digTert+1),
e(102, digSec, digTert+3),
e(119, digSec, digTert+1),
e(1),
},
},

// Ensure AppendNext* adds to the given buffer.
{"a10", p(5, 120, 2, 101, 100)},
{"a10", p(5, 120, 3, 101, 100, 1)},
} {
nw := NewNumericWeighter(numWeighter)

Expand Down Expand Up @@ -137,12 +142,28 @@ func TestNumericOverflow(t *testing.T) {

got, n := nw.AppendNextString(nil, manyDigits)

if n != maxDigits {
t.Errorf("n: got %d; want %d", n, maxDigits)
if n != maxDigits-1 { // A digit is lost because we can't use elem.Primary() == 0 as it sorts after elem.Primary() == 1
t.Errorf("n: got %d; want %d", n, maxDigits-1)
}

if got[1].Primary() != maxDigits {
t.Errorf("primary(e[1]): got %d; want %d", n, maxDigits)
t.Errorf("primary(e[1]): got %d; want %d", got[1].Primary(), maxDigits)
}
}

func TestNumericZeroOverflow(t *testing.T) {
manyDigits := strings.Repeat("0", maxDigits+1) + "a"

nw := NewNumericWeighter(numWeighter)

got, n := nw.AppendNextString(nil, manyDigits)

if n != maxDigits+2 { // Zeros after maxDigits-1 are ignored but we still consume them so that the number with leading zeros is ordered after a number with less leading zeros
t.Errorf("n: got %d; want %d", n, maxDigits+2)
}

if got[2].Primary() != maxDigits {
t.Errorf("primary(e[2]): got %d; want %d", got[1].Primary(), maxDigits)
}
}

Expand Down

0 comments on commit a905441

Please sign in to comment.