-
Notifications
You must be signed in to change notification settings - Fork 50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Wire up PartitionRouting into RemoteScannerManager #2172
Changes from all commits
6657938
a8c8fc4
0400532
9782dbe
3f44fc3
4151ea5
8ef20fc
971b3d7
5621d20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,8 +21,8 @@ use restate_types::partition_table::{FindPartition, PartitionTable, PartitionTab | |
|
||
use crate::network::rpc_router::{ConnectionAwareRpcError, ConnectionAwareRpcRouter, RpcError}; | ||
use crate::network::{HasConnection, Networking, Outgoing, TransportConnect}; | ||
use crate::routing_info::PartitionRouting; | ||
use crate::{metadata, ShutdownError}; | ||
use crate::partitions::PartitionRouting; | ||
use crate::ShutdownError; | ||
|
||
#[derive(Debug, thiserror::Error)] | ||
pub enum PartitionProcessorRpcClientError { | ||
|
@@ -319,12 +319,11 @@ where | |
.pinned() | ||
.find_partition_id(inner_request.partition_key())?; | ||
|
||
// TODO enable it once https://github.com/restatedev/restate/pull/2172 goes in | ||
// let node_id = self | ||
// .partition_routing | ||
// .get_node_by_partition(partition_id) | ||
// .ok_or(PartitionProcessorRpcClientError::UnknownNode(partition_id))?; | ||
let node_id = metadata().my_node_id(); | ||
let node_id = self | ||
.partition_routing | ||
.get_node_by_partition(partition_id) | ||
.ok_or(PartitionProcessorRpcClientError::UnknownNode(partition_id))?; | ||
Comment on lines
+322
to
+325
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How are we gonna refresh the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we shouldn't just inline this responsibility into the My worry is that by blocking assorted requests throughout the system for up to 3s, we could massively increase concurrency. This is why I didn't want to add a blocking version that waits for a refresh. Thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like the idea of pushing every potential caller into having to remember to manually call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the case where we don't have information for a given Regarding this very specific comment, it was more about who's calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think in time, this should probably become async, and we can introduce a retry policy - or a retrying wrapper around the lookup, so we can control retries by call site. For now, I've added an automatic refresh request whenever we return |
||
|
||
let response = self | ||
.rpc_router | ||
.call( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
// by the Apache License, Version 2.0. | ||
|
||
use std::collections::HashMap; | ||
use std::fmt::{Debug, Formatter}; | ||
use std::pin::pin; | ||
use std::sync::Arc; | ||
|
||
|
@@ -37,9 +38,8 @@ pub enum Command { | |
SyncRoutingInformation, | ||
} | ||
|
||
/// Holds a view of the known partition-to-node mappings. Use it to discover which node(s) to route | ||
/// requests to for a given partition. Compared to the partition table, this view is more dynamic as | ||
/// it changes based on cluster nodes' operational status. This handle can be cheaply cloned. | ||
/// Discover cluster nodes for a given partition. Compared to the partition table, this view is more | ||
/// dynamic as it changes based on cluster nodes' operational status. Can be cheaply cloned. | ||
#[derive(Clone)] | ||
pub struct PartitionRouting { | ||
sender: CommandSender, | ||
|
@@ -48,28 +48,61 @@ pub struct PartitionRouting { | |
} | ||
|
||
impl PartitionRouting { | ||
/// Look up a suitable node to answer requests for the given partition. | ||
/// Look up a suitable node to process requests for a given partition. Answers are authoritative | ||
/// though subject to propagation delays through the cluster in distributed deployments. | ||
/// Generally, as a consumer of routing information, your options are limited to backing off and | ||
/// retrying the request, or returning an error upstream when information is not available. | ||
/// | ||
/// A `None` response indicates that either we have no knowledge about this partition, or that | ||
/// the routing table has not yet been refreshed for the cluster. The latter condition should be | ||
/// brief and only on startup, so we can generally treat lack of response as a negative answer. | ||
/// An automatic refresh is scheduled any time a `None` response is returned. | ||
pub fn get_node_by_partition(&self, partition_id: PartitionId) -> Option<NodeId> { | ||
self.partition_to_node_mappings | ||
.load() | ||
.inner | ||
.get(&partition_id) | ||
.copied() | ||
let mappings = self.partition_to_node_mappings.load(); | ||
|
||
// This check should ideally be strengthened to make sure we're using reasonably fresh lookup data | ||
if mappings.version < Version::MIN { | ||
debug!("Partition routing information not available - requesting refresh"); | ||
self.request_refresh(); | ||
return None; | ||
} | ||
|
||
let maybe_node = mappings.inner.get(&partition_id).cloned(); | ||
if maybe_node.is_none() { | ||
debug!( | ||
?partition_id, | ||
"No known node for partition - requesting refresh" | ||
); | ||
self.request_refresh(); | ||
} | ||
Comment on lines
+71
to
+77
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Practically, this change will probably have no visible effect right now since the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The earlier conditional is will kick in before any load has happened, i.e. version = |
||
maybe_node | ||
} | ||
|
||
/// Call this to hint to the background refresher that its view may be outdated. This is useful | ||
/// when a caller discovers via some other mechanism that routing infromation may be invalid - | ||
/// for example, when a request to a node previously returned by `get_node_by_partition` fails | ||
/// with a response that explicitly indicates that it is no longer serving that partition. The | ||
/// call returns as soon as the request is enqueued. A refresh is not guaranteed to happen. | ||
pub async fn request_refresh(&self) { | ||
self.sender | ||
.send(Command::SyncRoutingInformation) | ||
.await | ||
.expect("Failed to send refresh request"); | ||
/// Provide a hint that the partition-to-nodes view may be outdated. This is useful when a | ||
/// caller discovers via some other mechanism that routing information may be invalid - for | ||
/// example, when a request to a node previously returned by | ||
/// [`PartitionRouting::get_node_by_partition`] indicates that it is no longer serving that | ||
/// partition. | ||
/// | ||
/// This call returns immediately, while the refresh itself is performed asynchronously on a | ||
/// best-effort basis. Multiple calls will not result in multiple refresh attempts. You only | ||
/// need to call this method if you get a node id, and later discover it's incorrect; a `None` | ||
/// response to a lookup triggers a refresh automatically. | ||
pub fn request_refresh(&self) { | ||
// if the channel already contains an unconsumed message, it doesn't matter that we can't send another | ||
let _ = self.sender.try_send(Command::SyncRoutingInformation).ok(); | ||
} | ||
} | ||
|
||
impl Debug for PartitionRouting { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { | ||
f.write_str("PartitionNodeResolver(")?; | ||
self.partition_to_node_mappings.load().fmt(f)?; | ||
f.write_str(")") | ||
} | ||
} | ||
|
||
#[derive(Debug)] | ||
struct PartitionToNodesRoutingTable { | ||
version: Version, | ||
/// A mapping of partition IDs to node IDs that are believed to be authoritative for that | ||
|
@@ -158,7 +191,7 @@ impl PartitionRoutingRefresher { | |
let metadata_store_client = self.metadata_store_client.clone(); | ||
|
||
let task = task_center().spawn_unmanaged( | ||
crate::TaskKind::Disposable, | ||
TaskKind::Disposable, | ||
"refresh-routing-information", | ||
None, | ||
{ | ||
|
@@ -228,3 +261,36 @@ async fn sync_routing_information( | |
}), | ||
); | ||
} | ||
|
||
#[cfg(any(test, feature = "test-util"))] | ||
pub mod mocks { | ||
use std::collections::HashMap; | ||
use std::sync::Arc; | ||
|
||
use arc_swap::ArcSwap; | ||
use tokio::sync::mpsc; | ||
|
||
use crate::partitions::PartitionRouting; | ||
use restate_types::identifiers::PartitionId; | ||
use restate_types::{GenerationalNodeId, NodeId, Version}; | ||
|
||
pub fn fixed_single_node( | ||
node_id: GenerationalNodeId, | ||
partition_id: PartitionId, | ||
) -> PartitionRouting { | ||
let (sender, _) = mpsc::channel(1); | ||
|
||
let mut mappings = HashMap::default(); | ||
mappings.insert(partition_id, NodeId::Generational(node_id)); | ||
|
||
PartitionRouting { | ||
sender, | ||
partition_to_node_mappings: Arc::new(ArcSwap::new(Arc::new( | ||
super::PartitionToNodesRoutingTable { | ||
version: Version::MIN, | ||
inner: mappings, | ||
}, | ||
))), | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is more intuitive and more general for the future.