Skip to content

Commit

Permalink
Add --priority keeper option.
Browse files Browse the repository at this point in the history
Sentinel will promote keeper with higher priority than the current one if this
is possible. In async mode this is a bit non-deterministic because we always
elect node with highest LSN, and under heavy load prioritized node might never
report LSN higher than its stronger competitors. However, if nodes are equal
this should happen at some moment. In sync mode, we can just elect any of
synchronous standbies.

Priority can be set during keeper start (--priority) or later with new command
'stolonctl set keeperpriority'. The latter allows to update priority without
restarting the keeper (and its Postgres instance), which can be used for
controlled failover.

Implements sorintlab#492
  • Loading branch information
arssher committed Jun 30, 2019
1 parent 81425db commit a7eab73
Show file tree
Hide file tree
Showing 9 changed files with 472 additions and 41 deletions.
16 changes: 16 additions & 0 deletions cmd/keeper/cmd/keeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ type config struct {
uid string
dataDir string
debug bool
priority int
prioritySpecified bool // true iff explicitly set by user
pgListenAddress string
pgAdvertiseAddress string
pgPort string
Expand Down Expand Up @@ -143,6 +145,7 @@ func init() {
CmdKeeper.PersistentFlags().StringVar(&cfg.pgSUPassword, "pg-su-password", "", "postgres superuser password. Only one of --pg-su-password or --pg-su-passwordfile must be provided. Must be the same for all keepers.")
CmdKeeper.PersistentFlags().StringVar(&cfg.pgSUPasswordFile, "pg-su-passwordfile", "", "postgres superuser password file. Only one of --pg-su-password or --pg-su-passwordfile must be provided. Must be the same for all keepers)")
CmdKeeper.PersistentFlags().BoolVar(&cfg.debug, "debug", false, "enable debug logging")
CmdKeeper.PersistentFlags().IntVar(&cfg.priority, "priority", 0, "keeper priority, integer. Stolon will promote available keeper with higher priority than current master, if this is possible. Healthy keeper with higher priority will be elected even if current master is online. If not specified, priority is set to 0 on first keeper invocation; on subsequent invocations, last value (which could be also set with 'stolonctl setkeeperpriority') is reused.")

CmdKeeper.PersistentFlags().MarkDeprecated("id", "please use --uid")
CmdKeeper.PersistentFlags().MarkDeprecated("debug", "use --log-level=debug instead")
Expand Down Expand Up @@ -440,6 +443,8 @@ type PostgresKeeper struct {
pgSUPassword string
pgInitialSUUsername string

priority *int // nil means not specified

sleepInterval time.Duration
requestTimeout time.Duration

Expand Down Expand Up @@ -470,6 +475,10 @@ func NewPostgresKeeper(cfg *config, end chan error) (*PostgresKeeper, error) {
return nil, fmt.Errorf("cannot get absolute datadir path for %q: %v", cfg.dataDir, err)
}

var priority *int = nil
if cfg.prioritySpecified {
priority = &cfg.priority
}
p := &PostgresKeeper{
cfg: cfg,

Expand All @@ -490,6 +499,8 @@ func NewPostgresKeeper(cfg *config, end chan error) (*PostgresKeeper, error) {
pgSUPassword: cfg.pgSUPassword,
pgInitialSUUsername: cfg.pgInitialSUUsername,

priority: priority,

sleepInterval: cluster.DefaultSleepInterval,
requestTimeout: cluster.DefaultRequestTimeout,

Expand Down Expand Up @@ -562,6 +573,7 @@ func (p *PostgresKeeper) updateKeeperInfo() error {
Maj: maj,
Min: min,
},
Priority: p.priority,
PostgresState: p.getLastPGState(),
}

Expand Down Expand Up @@ -1982,6 +1994,10 @@ func keeper(c *cobra.Command, args []string) {
}
}

// if --priority wasn't specified explictily, last value is reused, so
// remember it
cfg.prioritySpecified = c.Flags().Changed("priority")

// Open (and create if needed) the lock file.
// There is no need to clean up this file since we don't use the file as an actual lock. We get a lock
// on the file. So the lock get released when our process stops (or log.Fatalfs).
Expand Down
110 changes: 73 additions & 37 deletions cmd/sentinel/cmd/sentinel.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,11 @@ func (s *Sentinel) updateKeepersStatus(cd *cluster.ClusterData, keepersInfo clus
} else {
s.CleanKeeperError(keeperUID)
// Update keeper status infos
// If keeper restarted with specified priority, update it
if ki.Priority != nil &&
k.Status.BootUUID != ki.BootUUID {
k.Spec.Priority = *ki.Priority
}
k.Status.BootUUID = ki.BootUUID
k.Status.PostgresBinaryVersion.Maj = ki.PostgresBinaryVersion.Maj
k.Status.PostgresBinaryVersion.Min = ki.PostgresBinaryVersion.Min
Expand Down Expand Up @@ -687,12 +692,17 @@ func (s *Sentinel) validStandbysByStatus(cd *cluster.ClusterData) (map[string]*c
return goodStandbys, failedStandbys, convergingStandbys
}

// dbSlice implements sort interface to sort by XLogPos
type dbSlice []*cluster.DB

func (p dbSlice) Len() int { return len(p) }
func (p dbSlice) Less(i, j int) bool { return p[i].Status.XLogPos < p[j].Status.XLogPos }
func (p dbSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
// sort dbs by XLogPos and keeper's priority
func sortDBs(cd *cluster.ClusterData, dbs []*cluster.DB) {
sort.Slice(dbs, func(i, j int) bool {
if dbs[i].Status.XLogPos != dbs[j].Status.XLogPos {
return dbs[i].Status.XLogPos < dbs[j].Status.XLogPos
}
pi := cd.Keepers[dbs[i].Spec.KeeperUID].Spec.Priority
pj := cd.Keepers[dbs[j].Spec.KeeperUID].Spec.Priority
return pi < pj
})
}

func (s *Sentinel) findBestStandbys(cd *cluster.ClusterData, masterDB *cluster.DB) []*cluster.DB {
goodStandbys, _, _ := s.validStandbysByStatus(cd)
Expand All @@ -714,7 +724,7 @@ func (s *Sentinel) findBestStandbys(cd *cluster.ClusterData, masterDB *cluster.D
bestDBs = append(bestDBs, db)
}
// Sort by XLogPos
sort.Sort(dbSlice(bestDBs))
sortDBs(cd, bestDBs)
return bestDBs
}

Expand Down Expand Up @@ -744,11 +754,54 @@ func (s *Sentinel) findBestNewMasters(cd *cluster.ClusterData, masterDB *cluster
bestNewMasters = append(bestNewMasters, db)
}
// Sort by XLogPos
sort.Sort(dbSlice(bestNewMasters))
sortDBs(cd, bestNewMasters)
log.Debugf("bestNewMasters: %s", spew.Sdump(bestNewMasters))
return bestNewMasters
}

// Return DB who can be new master. This function mostly takes care of
// sync mode; in async case, new master is just first element of findBestNewMasters.
func (s *Sentinel) findBestNewMaster(cd *cluster.ClusterData, curMasterDB *cluster.DB, logErrors bool) *cluster.DB {
bestNewMasters := s.findBestNewMasters(cd, curMasterDB)
if len(bestNewMasters) == 0 {
if logErrors {
log.Errorw("no eligible masters")
}
return nil
}

// if synchronous replication is enabled, only choose new master in the synchronous replication standbys.
var bestNewMasterDB *cluster.DB = nil
if curMasterDB.Spec.SynchronousReplication == true {
commonSyncStandbys := util.CommonElements(curMasterDB.Status.SynchronousStandbys, curMasterDB.Spec.SynchronousStandbys)
if len(commonSyncStandbys) == 0 {
if logErrors {
log.Warnw("cannot choose synchronous standby since there are no common elements between the latest master reported synchronous standbys and the db spec ones", "reported", curMasterDB.Status.SynchronousStandbys, "spec", curMasterDB.Spec.SynchronousStandbys)
}
return nil
}
// In synchronous mode there is no need to choose DB with
// highest LSN; all found dbs must be in sync, so pick the one
// with highest priority.
var newMasterPriority int
for _, nm := range bestNewMasters {
if util.StringInSlice(commonSyncStandbys, nm.UID) {
nmPriority := cd.Keepers[nm.Spec.KeeperUID].Spec.Priority
if (bestNewMasterDB == nil) || (nmPriority > newMasterPriority) {
bestNewMasterDB = nm
newMasterPriority = nmPriority
}
}
}
if bestNewMasterDB == nil && logErrors {
log.Warnw("cannot choose synchronous standby since there's not match between the possible masters and the usable synchronousStandbys", "reported", curMasterDB.Status.SynchronousStandbys, "spec", curMasterDB.Spec.SynchronousStandbys, "common", commonSyncStandbys, "possibleMasters", bestNewMasters)
}
} else {
bestNewMasterDB = bestNewMasters[0]
}
return bestNewMasterDB
}

func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInfo) (*cluster.ClusterData, error) {
// take a cd deepCopy to check that the code isn't changing it (it'll be a bug)
origcd := cd.DeepCopy()
Expand Down Expand Up @@ -981,37 +1034,20 @@ func (s *Sentinel) updateCluster(cd *cluster.ClusterData, pis cluster.ProxiesInf
masterOK = false
}

if !masterOK {
log.Infow("trying to find a new master to replace failed master")
bestNewMasters := s.findBestNewMasters(newcd, curMasterDB)
if len(bestNewMasters) == 0 {
log.Errorw("no eligible masters")
bestNewMasterDB := s.findBestNewMaster(newcd, curMasterDB, !masterOK)
if bestNewMasterDB != nil {
if !masterOK {
log.Infow("electing db as the new master", "db", bestNewMasterDB.UID, "keeper", bestNewMasterDB.Spec.KeeperUID)
wantedMasterDBUID = bestNewMasterDB.UID
} else {
// if synchronous replication is enabled, only choose new master in the synchronous replication standbys.
var bestNewMasterDB *cluster.DB
if curMasterDB.Spec.SynchronousReplication == true {
commonSyncStandbys := util.CommonElements(curMasterDB.Status.SynchronousStandbys, curMasterDB.Spec.SynchronousStandbys)
if len(commonSyncStandbys) == 0 {
log.Warnw("cannot choose synchronous standby since there are no common elements between the latest master reported synchronous standbys and the db spec ones", "reported", curMasterDB.Status.SynchronousStandbys, "spec", curMasterDB.Spec.SynchronousStandbys)
} else {
for _, nm := range bestNewMasters {
if util.StringInSlice(commonSyncStandbys, nm.UID) {
bestNewMasterDB = nm
break
}
}
if bestNewMasterDB == nil {
log.Warnw("cannot choose synchronous standby since there's not match between the possible masters and the usable synchronousStandbys", "reported", curMasterDB.Status.SynchronousStandbys, "spec", curMasterDB.Spec.SynchronousStandbys, "common", commonSyncStandbys, "possibleMasters", bestNewMasters)
}
}
} else {
bestNewMasterDB = bestNewMasters[0]
}
if bestNewMasterDB != nil {
log.Infow("electing db as the new master", "db", bestNewMasterDB.UID, "keeper", bestNewMasterDB.Spec.KeeperUID)
// Even if current master is ok, we probably still
// want to change it if there is ready DB with higher
// keeper priority.
curMasterPriority := cd.Keepers[curMasterDB.Spec.KeeperUID].Spec.Priority
newMasterPriority := cd.Keepers[bestNewMasterDB.Spec.KeeperUID].Spec.Priority
if newMasterPriority > curMasterPriority {
log.Infow("electing db as the new master because it has higher priority", "db", bestNewMasterDB.UID, "keeper", bestNewMasterDB.Spec.KeeperUID, "currPriority", curMasterPriority, "newPriority", newMasterPriority)
wantedMasterDBUID = bestNewMasterDB.UID
} else {
log.Errorw("no eligible masters")
}
}
}
Expand Down
Loading

0 comments on commit a7eab73

Please sign in to comment.