forked from creachadair/mds
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmstr.go
105 lines (94 loc) · 3.1 KB
/
mstr.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
// Package mstr defines utility functions for strings.
package mstr
import (
"cmp"
"strings"
)
// Trunc returns a prefix of s having length no greater than n bytes. If s
// exceeds this length, it is truncated at a point ≤ n so that the result does
// not end in a partial UTF-8 encoding. Trunc does not verify that s is valid
// UTF-8, but if it is the result will remain valid after truncation.
func Trunc(s string, n int) string {
if n >= len(s) {
return s
}
// Back up until we find the beginning of a UTF-8 encoding.
for n > 0 && s[n-1]&0xc0 == 0x80 { // 0b10... is a continuation byte
n--
}
// If we're at the beginning of a multi-byte encoding, back up one more to
// skip it. It's possible the value was already complete, but it's simpler
// if we only have to check in one direction.
//
// Otherwise, we have a single-byte code (0b00... or 0b01...).
if n > 0 && s[n-1]&0xc0 == 0xc0 { // 0b11... starts a multibyte encoding
n--
}
return s[:n]
}
// Lines splits its argument on newlines. It is a convenience function for
// [strings.Split], except that it returns empty if s == "" and treats a
// trailing newline as the end of the file.
func Lines(s string) []string {
if s == "" {
return nil
}
return strings.Split(strings.TrimSuffix(s, "\n"), "\n")
}
// CompareNatural compares its arguments lexicographically, but treats runs of
// decimal digits as the spellings of natural numbers and compares their values
// instead of the individual digits.
//
// For example, "a2b" is after "a12b" under ordinary lexicographic comparison,
// but before under CompareNatural, because 2 < 12. However, if one argument
// has digits and the other has non-digits at that position (see for example
// "a" vs. "12") the comparison falls back to lexicographic.
//
// CompareNatural returns -1 if a < b, 0 if a == b, and +1 if a > b.
func CompareNatural(a, b string) int {
for a != "" && b != "" {
va, ra, aok := parseInt(a)
vb, rb, bok := parseInt(b)
if aok && bok {
// Both begin with runs of digits, compare them numerically.
if c := cmp.Compare(va, vb); c != 0 {
return c
}
a, b = ra, rb
continue
} else if aok != bok {
// One begins with digits, the other does not.
// They cannot be equal, so compare them lexicographically.
return cmp.Compare(a, b)
}
// Neither begins with digits. Compare runs of non-digits.
pa, ra := parseStr(a)
pb, rb := parseStr(b)
if c := cmp.Compare(pa, pb); c != 0 {
return c
}
a, b = ra, rb
}
return cmp.Compare(a, b)
}
// parseInt reports whether s begins with a run of one or more decimal digits,
// and if so returns the value of that run, along with the unconsumed tail of
// the string.
func parseInt(s string) (int, string, bool) {
var i, v int
for i < len(s) && isDigit(s[i]) {
v = (v * 10) + int(s[i]-'0')
i++
}
return v, s[i:], i > 0
}
// parseStr returns the longest prefix of s not containing decimal digits,
// along with the remaining suffix of s.
func parseStr(s string) (pfx, sfx string) {
var i int
for i < len(s) && !isDigit(s[i]) {
i++
}
return s[:i], s[i:]
}
func isDigit(b byte) bool { return b >= '0' && b <= '9' }