Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wrap errors happening during PRS in a new vterrors code #17669

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions go/test/endtoend/reparent/newfeaturetest/reparent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,76 @@ func TestBufferingWithMultipleDisruptions(t *testing.T) {
// Wait for all the writes to have succeeded.
wg.Wait()
}

// func TestBufferingErrorInTransaction(t *testing.T) {
// clusterInstance := utils.SetupShardedReparentCluster(t, policy.DurabilitySemiSync)
// defer utils.TeardownCluster(clusterInstance)
//
// // err := clusterInstance.StartVtgate()
// // require.NoError(t, err)
//
// // Start by reparenting all the shards to the first tablet.
// keyspace := clusterInstance.Keyspaces[0]
// tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets
// // Confirm that the replication is setup correctly in the beginning.
// // tablets[0] is the primary tablet in the beginning.
// utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2]})
//
// // Start a transaction to the primary.
// vtParams := clusterInstance.GetVTParams(keyspace.Name)
// conn, err := mysql.Connect(context.Background(), &vtParams)
// require.NoError(t, err)
// _, err = conn.ExecuteFetch("begin", 0, false)
// require.NoError(t, err)
// idx := 20
// // _, err = conn.ExecuteFetch(utils.GetInsertMultipleValuesQuery(idx, idx+1, idx+2, idx+3), 0, false)
// _, err = conn.ExecuteFetch(utils.GetInsertQuery(idx), 0, false)
// require.NoError(t, err)
//
// ctx, cancel := context.WithCancel(context.Background())
// defer cancel()
// go func() {
// for {
// select {
// case <-ctx.Done():
// return
// default:
// idx += 1
// // _, err = conn.ExecuteFetch(utils.GetInsertMultipleValuesQuery(idx, idx+1, idx+2, idx+3), 0, false)
// _, err = conn.ExecuteFetch(utils.GetInsertQuery(idx), 0, false)
// log.Errorf("Error received - %v", err)
// if err != nil {
// if strings.Contains(err.Error(), "transaction rolled back to reverse changes") {
// log.Errorf("We have rolled back - issuing a new begin")
// _, err = conn.ExecuteFetch("begin", 0, false)
// require.NoError(t, err)
// }
// }
// time.Sleep(100 * time.Millisecond)
// }
// }
// }()
//
// // Reparent to the other replica
// utils.ShardName = "-80"
// defer func() {
// utils.ShardName = "0"
// }()
//
// output, err := utils.Prs(t, clusterInstance, tablets[1])
// require.NoError(t, err, "error in PlannedReparentShard output - %s", output)
//
// time.Sleep(1 * time.Minute)
//
// // We now restart the vttablet that became a replica.
// utils.StopTablet(t, tablets[0], false)
//
// time.Sleep(1 * time.Minute)
//
// tablets[0].VttabletProcess.ServingStatus = "SERVING"
// err = tablets[0].VttabletProcess.Setup()
// require.NoError(t, err)
//
// time.Sleep(1 * time.Minute)
// cancel()
// }
28 changes: 18 additions & 10 deletions go/test/endtoend/reparent/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@ import (
)

var (
KeyspaceName = "ks"
dbName = "vt_" + KeyspaceName
username = "vt_dba"
Hostname = "localhost"
insertVal = 1
insertSQL = "insert into vt_insert_test(id, msg) values (%d, 'test %d')"
sqlSchema = `
KeyspaceName = "ks"
dbName = "vt_" + KeyspaceName
username = "vt_dba"
Hostname = "localhost"
insertVal = 1
insertSQL = "insert into vt_insert_test(id, msg) values (%d, 'test %d')"
insertSQLMultipleValues = "insert into vt_insert_test(id, msg) values (%d, 'test %d'), (%d, 'test %d'), (%d, 'test %d'), (%d, 'test %d')"
sqlSchema = `
create table vt_insert_test (
id bigint,
msg varchar(64),
Expand Down Expand Up @@ -98,12 +99,14 @@ func SetupShardedReparentCluster(t *testing.T, durability string) *cluster.Local

// Start keyspace
keyspace := &cluster.Keyspace{
Name: KeyspaceName,
SchemaSQL: sqlSchema,
VSchema: `{"sharded": true, "vindexes": {"hash_index": {"type": "hash"}}, "tables": {"vt_insert_test": {"column_vindexes": [{"column": "id", "name": "hash_index"}]}}}`,
Name: KeyspaceName,
SchemaSQL: sqlSchema,
VSchema: `{"sharded": true, "vindexes": {"hash_index": {"type": "hash"}}, "tables": {"vt_insert_test": {"column_vindexes": [{"column": "id", "name": "hash_index"}]}}}`,
// VSchema: `{"sharded": true, "vindexes": {"hash_index": {"type": "reverse_bits"}}, "tables": {"vt_insert_test": {"column_vindexes": [{"column": "id", "name": "hash_index"}]}}}`,
DurabilityPolicy: durability,
}
err = clusterInstance.StartKeyspace(*keyspace, []string{"-40", "40-80", "80-"}, 2, false)
// err = clusterInstance.StartKeyspace(*keyspace, []string{"-80", "80-"}, 2, false)
require.NoError(t, err)

// Start Vtgate
Expand All @@ -117,6 +120,11 @@ func GetInsertQuery(idx int) string {
return fmt.Sprintf(insertSQL, idx, idx)
}

// GetInsertMultipleValuesQuery returns a built insert query to insert multiple rows at once.
func GetInsertMultipleValuesQuery(idx1, idx2, idx3, idx4 int) string {
return fmt.Sprintf(insertSQLMultipleValues, idx1, idx1, idx2, idx2, idx3, idx3, idx4, idx4)
}

// GetSelectionQuery returns a built selection query read the data.
func GetSelectionQuery() string {
return `select * from vt_insert_test`
Expand Down
2 changes: 1 addition & 1 deletion go/vt/discovery/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ func (hc *HealthCheckImpl) TabletConnection(ctx context.Context, alias *topodata
hc.mu.Unlock()
if thc == nil || thc.Conn == nil {
// TODO: test that throws this error
return nil, vterrors.Errorf(vtrpc.Code_NOT_FOUND, "tablet: %v is either down or nonexistent", alias)
return nil, vterrors.VT15001(vtrpc.Code_NOT_FOUND, fmt.Sprintf("tablet: %v is either down or nonexistent", alias))
}
return thc.Connection(ctx), nil
}
Expand Down
38 changes: 38 additions & 0 deletions go/vt/vterrors/code.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package vterrors

import (
"fmt"
"strings"

vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"
)
Expand Down Expand Up @@ -120,6 +121,8 @@ var (
VT14004 = errorWithoutState("VT14004", vtrpcpb.Code_UNAVAILABLE, "cannot find keyspace for: %s", "The specified keyspace could not be found.")
VT14005 = errorWithoutState("VT14005", vtrpcpb.Code_UNAVAILABLE, "cannot lookup sidecar database for keyspace: %s", "Failed to read sidecar database identifier.")

VT15001 = errorWithNoCode("VT15001", "session invalidated: close/reopen connection if applicable: %s", "This error means that the opened transaction should be closed by the application and re-opened.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"session invalidated" may be misleading

  • if you are in a transaction, that needs to be closed, and a new transaction opened.
  • if you are not in a transaction, a retry is probably what you need to do.
    @harshit-gangal wdyt?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is the transaction which no longer exists. They can continue to use the same connection.
"rollback", followed by a retry is fine.
outside of transaction, retry is enough as Deepthi pointed.
All 3 error message should have different error code as they happen at different point in time of PRS/ERS


// Errors is a list of errors that must match all the variables
// defined above to enable auto-documentation of error codes.
Errors = []func(args ...any) *VitessError{
Expand Down Expand Up @@ -207,6 +210,10 @@ var (
VT14004,
VT14005,
}

ErrorsWithNoCode = []func(code vtrpcpb.Code, args ...any) *VitessError{
VT15001,
}
)

type VitessError struct {
Expand Down Expand Up @@ -258,3 +265,34 @@ func errorWithState(id string, code vtrpcpb.Code, state State, short, long strin
}
}
}

func errorWithNoCode(id string, short, long string) func(code vtrpcpb.Code, args ...any) *VitessError {
return func(code vtrpcpb.Code, args ...any) *VitessError {
s := short
if len(args) != 0 {
s = fmt.Sprintf(s, args...)
}

return &VitessError{
Err: New(code, id+": "+s),
Description: long,
ID: id,
}
}
}

func ErrorsHaveInvalidSession(errs []error) bool {
for _, err := range errs {
if IsInvalidSessionError(err) {
return true
}
}
return false
}

func IsInvalidSessionError(err error) bool {
if err == nil {
return false
}
return strings.Contains(err.Error(), VT15001(0).ID)
}
18 changes: 16 additions & 2 deletions go/vt/vterrors/vterrorsgen/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"text/template"

"vitess.io/vitess/go/mysql/sqlerror"
vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc"

"vitess.io/vitess/go/vt/vterrors"
)
Expand All @@ -31,10 +32,14 @@ const (
tmpl = `
| ID | Description | Error | MySQL Error Code | SQL State |
| --- | --- | --- | --- | --- |
{{- range $err := . }}
{{- range $err := .Errors }}
{{- $data := (call $err) }}
| {{ $data.ID }} | {{ $data.Description }} | {{ FormatError $data.Err }} | {{ ConvertStateToMySQLErrorCode $data.State }} | {{ ConvertStateToMySQLState $data.State }} |
{{- end }}
{{- range $err := .ErrorsWithNoCode }}
{{- $data := (call $err 0) }}
| {{ $data.ID }} | {{ $data.Description }} | {{ FormatError $data.Err }} | | {{ ConvertStateToMySQLState $data.State }} |
{{- end }}
`
)

Expand All @@ -53,7 +58,16 @@ func main() {
})
t = template.Must(t.Parse(tmpl))

err := t.ExecuteTemplate(os.Stdout, "template", vterrors.Errors)
type data struct {
Errors []func(args ...any) *vterrors.VitessError
ErrorsWithNoCode []func(code vtrpcpb.Code, args ...any) *vterrors.VitessError
}
d := data{
Errors: vterrors.Errors,
ErrorsWithNoCode: vterrors.ErrorsWithNoCode,
}

err := t.ExecuteTemplate(os.Stdout, "template", d)
if err != nil {
log.Fatal(err)
}
Expand Down
12 changes: 7 additions & 5 deletions go/vt/vtgate/executorcontext/vcursor_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -762,11 +762,12 @@ func (vc *VCursorImpl) Execute(ctx context.Context, method string, query string,

err := vc.markSavepoint(ctx, rollbackOnError, map[string]*querypb.BindVariable{})
if err != nil {
vc.setRollbackOnPartialExecIfRequired(vterrors.IsInvalidSessionError(err), true, rollbackOnError)
return nil, err
}

qr, err := vc.executor.Execute(ctx, nil, method, session, vc.marginComments.Leading+query+vc.marginComments.Trailing, bindVars)
vc.setRollbackOnPartialExecIfRequired(err != nil, rollbackOnError)
vc.setRollbackOnPartialExecIfRequired(vterrors.IsInvalidSessionError(err), err != nil, rollbackOnError)

return qr, err
}
Expand Down Expand Up @@ -794,11 +795,12 @@ func (vc *VCursorImpl) ExecuteMultiShard(ctx context.Context, primitive engine.P
atomic.AddUint64(&vc.logStats.ShardQueries, uint64(noOfShards))
err := vc.markSavepoint(ctx, rollbackOnError && (noOfShards > 1), map[string]*querypb.BindVariable{})
if err != nil {
vc.setRollbackOnPartialExecIfRequired(vterrors.IsInvalidSessionError(err), true, rollbackOnError)
return nil, []error{err}
}

qr, errs := vc.executor.ExecuteMultiShard(ctx, primitive, rss, commentedShardQueries(queries, vc.marginComments), vc.SafeSession, canAutocommit, vc.ignoreMaxMemoryRows, vc.observer, fetchLastInsertID)
vc.setRollbackOnPartialExecIfRequired(len(errs) != len(rss), rollbackOnError)
vc.setRollbackOnPartialExecIfRequired(vterrors.ErrorsHaveInvalidSession(errs), len(errs) != len(rss), rollbackOnError)
vc.logShardsQueried(primitive, len(rss))
if qr != nil && qr.InsertIDUpdated() {
vc.SafeSession.LastInsertId = qr.InsertID
Expand All @@ -818,7 +820,7 @@ func (vc *VCursorImpl) StreamExecuteMulti(ctx context.Context, primitive engine.
}

errs := vc.executor.StreamExecuteMulti(ctx, primitive, vc.marginComments.Leading+query+vc.marginComments.Trailing, rss, bindVars, vc.SafeSession, autocommit, callback, vc.observer, fetchLastInsertID)
vc.setRollbackOnPartialExecIfRequired(len(errs) != len(rss), rollbackOnError)
vc.setRollbackOnPartialExecIfRequired(vterrors.ErrorsHaveInvalidSession(errs), len(errs) != len(rss), rollbackOnError)

return errs
}
Expand Down Expand Up @@ -903,8 +905,8 @@ func (vc *VCursorImpl) AutocommitApproval() bool {
// when the query gets successfully executed on at least one shard,
// there does not exist any old savepoint for which rollback is already set
// and rollback on error is allowed.
func (vc *VCursorImpl) setRollbackOnPartialExecIfRequired(atleastOneSuccess bool, rollbackOnError bool) {
if atleastOneSuccess && rollbackOnError && !vc.SafeSession.IsRollbackSet() {
func (vc *VCursorImpl) setRollbackOnPartialExecIfRequired(required bool, atleastOneSuccess bool, rollbackOnError bool) {
if required || atleastOneSuccess && rollbackOnError && !vc.SafeSession.IsRollbackSet() {
vc.SafeSession.SetRollbackCommand()
}
}
Expand Down
13 changes: 12 additions & 1 deletion go/vt/vttablet/grpctabletconn/conn.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,15 @@ package grpctabletconn
import (
"context"
"io"
"strings"
"sync"

"github.com/spf13/pflag"
"google.golang.org/grpc"

"vitess.io/vitess/go/vt/proto/vtrpc"
"vitess.io/vitess/go/vt/vterrors"

"vitess.io/vitess/go/netutil"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/callerid"
Expand Down Expand Up @@ -132,7 +136,14 @@ func (conn *gRPCQueryClient) Execute(ctx context.Context, target *querypb.Target
}
er, err := conn.c.Execute(ctx, req)
if err != nil {
return nil, tabletconn.ErrorFromGRPC(err)
err = tabletconn.ErrorFromGRPC(err)
if err == nil {
return nil, nil
}
if vterrors.Code(err) == vtrpc.Code_UNAVAILABLE && strings.Contains(err.Error(), "connection refused") {
return nil, vterrors.VT15001(vtrpc.Code_UNAVAILABLE, err.Error())
}
return nil, err
}
return sqltypes.Proto3ToResult(er.Result), nil
}
Expand Down
6 changes: 3 additions & 3 deletions go/vt/vttablet/tabletserver/state_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,15 +391,15 @@ func (sm *stateManager) StartRequest(ctx context.Context, target *querypb.Target

if sm.state != StateServing || !sm.replHealthy || sm.demotePrimaryStalled {
// This specific error string needs to be returned for vtgate buffering to work.
return vterrors.New(vtrpcpb.Code_CLUSTER_EVENT, vterrors.NotServing)
return vterrors.VT15001(vtrpcpb.Code_CLUSTER_EVENT, vterrors.NotServing)
}

shuttingDown := sm.wantState != StateServing
// If wait counter for the requests is not zero, then there are go-routines blocked on waiting for requests to be empty.
// We cannot allow adding to the requests to prevent any panics from happening.
if (shuttingDown && !allowOnShutdown) || sm.rw.GetWaiterCount() > 0 {
// This specific error string needs to be returned for vtgate buffering to work.
return vterrors.New(vtrpcpb.Code_CLUSTER_EVENT, vterrors.ShuttingDown)
return vterrors.VT15001(vtrpcpb.Code_CLUSTER_EVENT, vterrors.ShuttingDown)
}

err = sm.verifyTargetLocked(ctx, target)
Expand Down Expand Up @@ -436,7 +436,7 @@ func (sm *stateManager) verifyTargetLocked(ctx context.Context, target *querypb.
return nil
}
}
return vterrors.Errorf(vtrpcpb.Code_FAILED_PRECONDITION, "%s: %v, want: %v or %v", vterrors.WrongTablet, target.TabletType, sm.target.TabletType, sm.alsoAllow)
return vterrors.VT15001(vtrpcpb.Code_FAILED_PRECONDITION, fmt.Sprintf("%s: %v, want: %v or %v", vterrors.WrongTablet, target.TabletType, sm.target.TabletType, sm.alsoAllow))
}
} else {
if !tabletenv.IsLocalContext(ctx) {
Expand Down
Loading