Skip to content

Commit

Permalink
Add --priority keeper option.
Browse files Browse the repository at this point in the history
Sentinel will promote keeper with higher priority than the current one if this
is possible. In async mode this is a bit non-deterministic because we always
elect node with highest LSN, and under heavy load prioritized node might never
report LSN higher than its stronger competitors. However, if nodes are equal
this should happen at some moment. In sync mode, we can just elect any of
synchronous standbies.

Implements sorintlab#492
  • Loading branch information
arssher committed Mar 11, 2019
1 parent e776b3a commit a3b812a
Show file tree
Hide file tree
Showing 8 changed files with 357 additions and 39 deletions.
3 changes: 3 additions & 0 deletions cmd/keeper/cmd/keeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ type config struct {
uid string
dataDir string
debug bool
priority int
pgListenAddress string
pgPort string
pgBinPath string
Expand Down Expand Up @@ -134,6 +135,7 @@ func init() {
CmdKeeper.PersistentFlags().StringVar(&cfg.pgSUPassword, "pg-su-password", "", "postgres superuser password. Only one of --pg-su-password or --pg-su-passwordfile must be provided. Must be the same for all keepers.")
CmdKeeper.PersistentFlags().StringVar(&cfg.pgSUPasswordFile, "pg-su-passwordfile", "", "postgres superuser password file. Only one of --pg-su-password or --pg-su-passwordfile must be provided. Must be the same for all keepers)")
CmdKeeper.PersistentFlags().BoolVar(&cfg.debug, "debug", false, "enable debug logging")
CmdKeeper.PersistentFlags().IntVar(&cfg.priority, "priority", 0, "keeper priority, integer. Stolon will promote available keeper with higher priority than current master, if this is possible. Default is 0.")

CmdKeeper.PersistentFlags().MarkDeprecated("id", "please use --uid")
CmdKeeper.PersistentFlags().MarkDeprecated("debug", "use --log-level=debug instead")
Expand Down Expand Up @@ -533,6 +535,7 @@ func (p *PostgresKeeper) updateKeeperInfo() error {
Maj: maj,
Min: min,
},
Priority: p.cfg.priority,
PostgresState: p.getLastPGState(),
}

Expand Down
108 changes: 71 additions & 37 deletions cmd/sentinel/cmd/sentinel.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ func (s *Sentinel) updateKeepersStatus(cd *cluster.ClusterData, keepersInfo clus
k.Status.BootUUID = ki.BootUUID
k.Status.PostgresBinaryVersion.Maj = ki.PostgresBinaryVersion.Maj
k.Status.PostgresBinaryVersion.Min = ki.PostgresBinaryVersion.Min
k.Status.Priority = ki.Priority
}
}

Expand Down Expand Up @@ -689,12 +690,17 @@ func (s *Sentinel) validStandbysByStatus(cd *cluster.ClusterData) (map[string]*c
return goodStandbys, failedStandbys, convergingStandbys
}

// dbSlice implements sort interface to sort by XLogPos
type dbSlice []*cluster.DB

func (p dbSlice) Len() int { return len(p) }
func (p dbSlice) Less(i, j int) bool { return p[i].Status.XLogPos < p[j].Status.XLogPos }
func (p dbSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
// sort dbs by XLogPos and keeper's priority
func sortDBs(cd *cluster.ClusterData, dbs []*cluster.DB) {
sort.Slice(dbs, func(i, j int) bool {
if dbs[i].Status.XLogPos != dbs[j].Status.XLogPos {
return dbs[i].Status.XLogPos < dbs[j].Status.XLogPos
}
pi := cd.Keepers[dbs[i].Spec.KeeperUID].Status.Priority
pj := cd.Keepers[dbs[i].Spec.KeeperUID].Status.Priority
return pi < pj
})
}

func (s *Sentinel) findBestStandbys(cd *cluster.ClusterData, masterDB *cluster.DB) []*cluster.DB {
goodStandbys, _, _ := s.validStandbysByStatus(cd)
Expand All @@ -716,7 +722,7 @@ func (s *Sentinel) findBestStandbys(cd *cluster.ClusterData, masterDB *cluster.D
bestDBs = append(bestDBs, db)
}
// Sort by XLogPos
sort.Sort(dbSlice(bestDBs))
sortDBs(cd, bestDBs)
return bestDBs
}

Expand Down Expand Up @@ -746,11 +752,56 @@ func (s *Sentinel) findBestNewMasters(cd *cluster.ClusterData, masterDB *cluster
bestNewMasters = append(bestNewMasters, db)
}
// Sort by XLogPos
sort.Sort(dbSlice(bestNewMasters))
sortDBs(cd, bestNewMasters)
log.Debugf("bestNewMasters: %s", spew.Sdump(bestNewMasters))
return bestNewMasters
}

// Return DB who can be new master. This function mostly takes care of
// sync mode; in async case, new master is just first element of findBestNewMasters.
func (s *Sentinel) findBestNewMaster(cd *cluster.ClusterData, curMasterDB *cluster.DB, logErrors bool) *cluster.DB {
bestNewMasters := s.findBestNewMasters(cd, curMasterDB)
if len(bestNewMasters) == 0 {
if logErrors {
log.Errorw("no eligible masters")
}
return nil
}

// if synchronous replication is enabled, only choose new master in the synchronous replication standbys.
var bestNewMasterDB *cluster.DB = nil
if curMasterDB.Spec.SynchronousReplication == true {
commonSyncStandbys := util.CommonElements(curMasterDB.Status.SynchronousStandbys, curMasterDB.Spec.SynchronousStandbys)
if len(commonSyncStandbys) == 0 {
if logErrors {
log.Errorw("cannot choose synchronous standby since there are no common elements between the latest master reported synchronous standbys and the db spec ones", "reported", curMasterDB.Status.SynchronousStandbys, "spec", curMasterDB.Spec.SynchronousStandbys)
}
return nil
}
// In synchronous mode there is no need to choose DB with
// highest LSN; all found dbs must be in sync, so pick the one
// with highest priority.
var newMasterPriority int
for _, nm := range bestNewMasters {
if util.StringInSlice(commonSyncStandbys, nm.UID) {
nmPriority := cd.Keepers[nm.Spec.KeeperUID].Status.Priority
if (bestNewMasterDB == nil) || (nmPriority > newMasterPriority) {
bestNewMasterDB = nm
newMasterPriority = nmPriority
}
}
}
if bestNewMasterDB == nil {
if logErrors {
log.Errorw("cannot choose synchronous standby since there's not match between the possible masters and the usable synchronousStandbys", "reported", curMasterDB.Status.SynchronousStandbys, "spec", curMasterDB.Spec.SynchronousStandbys, "common", commonSyncStandbys, "possibleMasters", bestNewMasters)
}
}
} else {
bestNewMasterDB = bestNewMasters[0]
}
return bestNewMasterDB
}

func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInfo) (*cluster.ClusterData, error) {
// take a cd deepCopy to check that the code isn't changing it (it'll be a bug)
origcd := cd.DeepCopy()
Expand Down Expand Up @@ -983,37 +1034,20 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
masterOK = false
}

if !masterOK {
log.Infow("trying to find a new master to replace failed master")
bestNewMasters := s.findBestNewMasters(newcd, curMasterDB)
if len(bestNewMasters) == 0 {
log.Errorw("no eligible masters")
bestNewMasterDB := s.findBestNewMaster(newcd, curMasterDB, !masterOK)
if bestNewMasterDB != nil {
if !masterOK {
log.Infow("electing db as the new master", "db", bestNewMasterDB.UID, "keeper", bestNewMasterDB.Spec.KeeperUID)
wantedMasterDBUID = bestNewMasterDB.UID
} else {
// if synchronous replication is enabled, only choose new master in the synchronous replication standbys.
var bestNewMasterDB *cluster.DB
if curMasterDB.Spec.SynchronousReplication == true {
commonSyncStandbys := util.CommonElements(curMasterDB.Status.SynchronousStandbys, curMasterDB.Spec.SynchronousStandbys)
if len(commonSyncStandbys) == 0 {
log.Warnw("cannot choose synchronous standby since there are no common elements between the latest master reported synchronous standbys and the db spec ones", "reported", curMasterDB.Status.SynchronousStandbys, "spec", curMasterDB.Spec.SynchronousStandbys)
} else {
for _, nm := range bestNewMasters {
if util.StringInSlice(commonSyncStandbys, nm.UID) {
bestNewMasterDB = nm
break
}
}
if bestNewMasterDB == nil {
log.Warnw("cannot choose synchronous standby since there's not match between the possible masters and the usable synchronousStandbys", "reported", curMasterDB.Status.SynchronousStandbys, "spec", curMasterDB.Spec.SynchronousStandbys, "common", commonSyncStandbys, "possibleMasters", bestNewMasters)
}
}
} else {
bestNewMasterDB = bestNewMasters[0]
}
if bestNewMasterDB != nil {
log.Infow("electing db as the new master", "db", bestNewMasterDB.UID, "keeper", bestNewMasterDB.Spec.KeeperUID)
// Even if current master is ok, we probably still
// want to change it if there is ready DB with higher
// keeper priority.
curMasterPriority := cd.Keepers[curMasterDB.Spec.KeeperUID].Status.Priority
newMasterPriority := cd.Keepers[bestNewMasterDB.Spec.KeeperUID].Status.Priority
if newMasterPriority > curMasterPriority {
log.Infow("electing db as the new master because it has higher priority", "db", bestNewMasterDB.UID, "keeper", bestNewMasterDB.Spec.KeeperUID, "currPriority", curMasterPriority, "newPriority", newMasterPriority)
wantedMasterDBUID = bestNewMasterDB.UID
} else {
log.Errorw("no eligible masters")
}
}
}
Expand Down
184 changes: 184 additions & 0 deletions cmd/sentinel/cmd/sentinel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4960,6 +4960,190 @@ func TestUpdateCluster(t *testing.T) {
},
},
},
// #26 Test keeper's priority. One master and one healthy
// standby. Master is ok, but standy has higher priority and
// gets elected.
{
cd: &cluster.ClusterData{
Cluster: &cluster.Cluster{
UID: "cluster1",
Generation: 1,
Spec: &cluster.ClusterSpec{
ConvergenceTimeout: &cluster.Duration{Duration: cluster.DefaultConvergenceTimeout},
InitTimeout: &cluster.Duration{Duration: cluster.DefaultInitTimeout},
SyncTimeout: &cluster.Duration{Duration: cluster.DefaultSyncTimeout},
MaxStandbysPerSender: cluster.Uint16P(cluster.DefaultMaxStandbysPerSender),
},
Status: cluster.ClusterStatus{
CurrentGeneration: 1,
Phase: cluster.ClusterPhaseNormal,
Master: "db1",
},
},
Keepers: cluster.Keepers{
"keeper1": &cluster.Keeper{
UID: "keeper1",
Spec: &cluster.KeeperSpec{},
Status: cluster.KeeperStatus{
Healthy: true,
LastHealthyTime: now,
},
},
"keeper2": &cluster.Keeper{
UID: "keeper2",
Spec: &cluster.KeeperSpec{},
Status: cluster.KeeperStatus{
Healthy: true,
LastHealthyTime: now,
Priority: 1,
},
},
},
DBs: cluster.DBs{
"db1": &cluster.DB{
UID: "db1",
Generation: 1,
ChangeTime: time.Time{},
Spec: &cluster.DBSpec{
KeeperUID: "keeper1",
RequestTimeout: cluster.Duration{Duration: cluster.DefaultRequestTimeout},
MaxStandbys: cluster.DefaultMaxStandbys,
AdditionalWalSenders: cluster.DefaultAdditionalWalSenders,
InitMode: cluster.DBInitModeNone,
SynchronousReplication: false,
Role: common.RoleMaster,
Followers: []string{"db2"},
SynchronousStandbys: nil,
ExternalSynchronousStandbys: nil,
},
Status: cluster.DBStatus{
Healthy: true,
CurrentGeneration: 1,
},
},
"db2": &cluster.DB{
UID: "db2",
Generation: 1,
ChangeTime: time.Time{},
Spec: &cluster.DBSpec{
KeeperUID: "keeper2",
RequestTimeout: cluster.Duration{Duration: cluster.DefaultRequestTimeout},
MaxStandbys: cluster.DefaultMaxStandbys,
AdditionalWalSenders: cluster.DefaultAdditionalWalSenders,
InitMode: cluster.DBInitModeNone,
SynchronousReplication: false,
Role: common.RoleStandby,
Followers: []string{},
FollowConfig: &cluster.FollowConfig{
Type: cluster.FollowTypeInternal,
DBUID: "db1",
},
SynchronousStandbys: nil,
ExternalSynchronousStandbys: nil,
},
Status: cluster.DBStatus{
Healthy: true,
CurrentGeneration: 1,
},
},
},
Proxy: &cluster.Proxy{
Generation: 1,
Spec: cluster.ProxySpec{
MasterDBUID: "db1",
EnabledProxies: []string{},
},
},
},
outcd: &cluster.ClusterData{
Cluster: &cluster.Cluster{
UID: "cluster1",
Generation: 1,
Spec: &cluster.ClusterSpec{
ConvergenceTimeout: &cluster.Duration{Duration: cluster.DefaultConvergenceTimeout},
InitTimeout: &cluster.Duration{Duration: cluster.DefaultInitTimeout},
SyncTimeout: &cluster.Duration{Duration: cluster.DefaultSyncTimeout},
MaxStandbysPerSender: cluster.Uint16P(cluster.DefaultMaxStandbysPerSender),
},
Status: cluster.ClusterStatus{
CurrentGeneration: 1,
Phase: cluster.ClusterPhaseNormal,
Master: "db2",
},
},
Keepers: cluster.Keepers{
"keeper1": &cluster.Keeper{
UID: "keeper1",
Spec: &cluster.KeeperSpec{},
Status: cluster.KeeperStatus{
Healthy: true,
LastHealthyTime: now,
},
},
"keeper2": &cluster.Keeper{
UID: "keeper2",
Spec: &cluster.KeeperSpec{},
Status: cluster.KeeperStatus{
Healthy: true,
LastHealthyTime: now,
Priority: 1,
},
},
},
DBs: cluster.DBs{
"db1": &cluster.DB{
UID: "db1",
Generation: 2,
ChangeTime: time.Time{},
Spec: &cluster.DBSpec{
KeeperUID: "keeper1",
RequestTimeout: cluster.Duration{Duration: cluster.DefaultRequestTimeout},
MaxStandbys: cluster.DefaultMaxStandbys,
AdditionalWalSenders: cluster.DefaultAdditionalWalSenders,
InitMode: cluster.DBInitModeNone,
SynchronousReplication: false,
Role: common.RoleMaster,
Followers: []string{},
SynchronousStandbys: nil,
ExternalSynchronousStandbys: nil,
},
Status: cluster.DBStatus{
Healthy: true,
CurrentGeneration: 1,
},
},
"db2": &cluster.DB{
UID: "db2",
Generation: 2,
ChangeTime: time.Time{},
Spec: &cluster.DBSpec{
KeeperUID: "keeper2",
RequestTimeout: cluster.Duration{Duration: cluster.DefaultRequestTimeout},
MaxStandbys: cluster.DefaultMaxStandbys,
AdditionalWalSenders: cluster.DefaultAdditionalWalSenders,
InitMode: cluster.DBInitModeNone,
SynchronousReplication: false,
Role: common.RoleMaster,
Followers: []string{},
FollowConfig: nil,
SynchronousStandbys: nil,
ExternalSynchronousStandbys: nil,
},
Status: cluster.DBStatus{
Healthy: true,
CurrentGeneration: 1,
},
},
},
Proxy: &cluster.Proxy{
Generation: 2,
Spec: cluster.ProxySpec{
MasterDBUID: "",
EnabledProxies: []string{},
},
},
},
},
}

for i, tt := range tests {
Expand Down
3 changes: 2 additions & 1 deletion doc/commands/stolon-keeper.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ stolon-keeper [flags]
--pg-su-password string postgres superuser password. Only one of --pg-su-password or --pg-su-passwordfile must be provided. Must be the same for all keepers.
--pg-su-passwordfile string postgres superuser password file. Only one of --pg-su-password or --pg-su-passwordfile must be provided. Must be the same for all keepers)
--pg-su-username string postgres superuser user name. Used for keeper managed instance access and pg_rewind based synchronization. It'll be created on db initialization. Defaults to the name of the effective user running stolon-keeper. Must be the same for all keepers. (default "motaboy")
--priority int keeper priority, integer. Stolon will promote available keeper with higher priority than current master, if this is possible. Default is 0.
--store-backend string store backend type (etcdv2/etcd, etcdv3, consul or kubernetes)
--store-ca-file string verify certificates of HTTPS-enabled store servers using this CA bundle
--store-cert-file string certificate file for client identification to the store
Expand All @@ -41,4 +42,4 @@ stolon-keeper [flags]
--uid string keeper uid (must be unique in the cluster and can contain only lower-case letters, numbers and the underscore character). If not provided a random uid will be generated.
```

###### Auto generated by spf13/cobra on 21-Aug-2018
###### Auto generated by spf13/cobra on 11-Mar-2019
3 changes: 3 additions & 0 deletions internal/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,8 @@ type KeeperStatus struct {
PostgresBinaryVersion PostgresBinaryVersion `json:"postgresBinaryVersion,omitempty"`

ForceFail bool `json:"forceFail,omitempty"`

Priority int `json:"priority,omitempty"`
}

type Keeper struct {
Expand All @@ -575,6 +577,7 @@ func NewKeeperFromKeeperInfo(ki *KeeperInfo) *Keeper {
Healthy: true,
LastHealthyTime: time.Now(),
BootUUID: ki.BootUUID,
Priority: ki.Priority,
},
}
}
Expand Down
Loading

0 comments on commit a3b812a

Please sign in to comment.