Skip to content

Commit

Permalink
tools/internal: wrap use of collators in mutexes
Browse files Browse the repository at this point in the history
Despite being a process of reading from read-only state, somehow these
APIs turn out to be thread-unsafe, and parsing multiple PSLs concurrently
in the same process results in data races and panics.

Thankfully each use of the collator is quite short, so wrapping their use
in a global mutex, while unfortunate, is acceptable. And enabling parallel
parsing of PSLs speeds up online validations by a factor of 10x on many cored
machines.
  • Loading branch information
danderson committed Sep 18, 2024
1 parent e5a1782 commit 70bca00
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 4 deletions.
22 changes: 18 additions & 4 deletions tools/internal/domain/domain.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"slices"
"strings"
"sync"

"golang.org/x/net/idna"
"golang.org/x/text/collate"
Expand Down Expand Up @@ -203,10 +204,7 @@ func (l Label) Compare(m Label) int {
// If two labels aren't equal, we are free to order them however
// we want. We choose to order them with the English Unicode
// collation.
var buf collate.Buffer
kl := labelCollator.KeyFromString(&buf, l.label)
km := labelCollator.KeyFromString(&buf, m.label)
if res := bytes.Compare(kl, km); res != 0 {
if res := compareLabel(l, m); res != 0 {
return res
}

Expand Down Expand Up @@ -297,4 +295,20 @@ var domainValidator = idna.New(
// byte compare. However, this option is buggy and silently ignored in
// some cases (https://github.com/golang/go/issues/68379), so we do
// this tie breaking ourselves in Label.Compare.
var labelCollatorMu sync.Mutex
var labelCollator = collate.New(language.English)

func compareLabel(a, b Label) int {
// Somehow, despite this collation process being almost entirely
// reading from constant read-only tables compiled into the
// binary, x/text/collate's KeyFromString is not safe to use
// concurrently. Wrap its use in a mutex so that multiple
// concurrent parse operations serialize their collation key
// generation.
labelCollatorMu.Lock()
defer labelCollatorMu.Unlock()
var buf collate.Buffer
kl := labelCollator.KeyFromString(&buf, a.label)
km := labelCollator.KeyFromString(&buf, b.label)
return bytes.Compare(kl, km)
}
11 changes: 11 additions & 0 deletions tools/internal/parser/unicode.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package parser

import (
"bytes"
"sync"

"golang.org/x/text/collate"
"golang.org/x/text/language"
Expand Down Expand Up @@ -67,6 +68,15 @@ func compareCommentText(a string, b string) int {
// corresponding "sort keys", and then bytes.Compare those. There
// are more exhaustive tests for sort key computation, so there is
// higher confidence that it works correctly.
//
// Also, somehow, despite this collation process being almost
// entirely reading from constant read-only tables compiled into
// the binary, x/text/collate's KeyFromString is not safe to use
// concurrently. Wrap its use in a mutex so that multiple
// concurrent parse operations serialize their collation key
// generation.
commentCollatorMu.Lock()
defer commentCollatorMu.Unlock()
var buf collate.Buffer
ka := commentCollator.KeyFromString(&buf, a)
kb := commentCollator.KeyFromString(&buf, b)
Expand All @@ -77,3 +87,4 @@ func compareCommentText(a string, b string) int {
// non-suffix text. See the comment at the start of this file for more
// details.
var commentCollator = collate.New(language.MustParse("en"))
var commentCollatorMu sync.Mutex

0 comments on commit 70bca00

Please sign in to comment.