From 41dd0beea9251ce2a40d385190c3b623ce47e7c6 Mon Sep 17 00:00:00 2001 From: Jack Kleeman Date: Thu, 14 Nov 2024 09:07:29 -0700 Subject: [PATCH] Otel papercut fixes (#2291) 1. Remove unnecessary duplicate names eg 'sleep sleep' 2. Emit spans for Run entries 3. Clap env var RESTATE_SERVICES_RUNTIME_ENDPOINT -> RESTATE_TRACING_SERVICES_ENDPOINT --- crates/tracing-instrumentation/src/lib.rs | 18 +++++--- .../types/src/config/cli_option_overrides.rs | 2 +- .../worker/src/partition/state_machine/mod.rs | 42 +++++++++++++------ 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/crates/tracing-instrumentation/src/lib.rs b/crates/tracing-instrumentation/src/lib.rs index 71077c63e..ea85ad047 100644 --- a/crates/tracing-instrumentation/src/lib.rs +++ b/crates/tracing-instrumentation/src/lib.rs @@ -381,14 +381,13 @@ macro_rules! invocation_span { $crate::invocation_span!( level = $lvl, relation = $relation, - prefix = $prefix, name = format!("{} {}", $prefix, $target.short()), attributes=attributes, fields=($($field = $field_value),*) ) } }; - (level= $lvl:expr, relation = $relation:expr, prefix= $prefix:expr, id= $id:expr, name= $name:expr, tags=($($($key:ident).+ = $value:expr),*), fields=($($field:ident = $field_value:expr),*)) => { + (level= $lvl:expr, relation = $relation:expr, id= $id:expr, name= $name:expr, tags=($($($key:ident).+ = $value:expr),*), fields=($($field:ident = $field_value:expr),*)) => { { use opentelemetry::KeyValue; @@ -400,14 +399,13 @@ macro_rules! invocation_span { $crate::invocation_span!( level = $lvl, relation = $relation, - prefix = $prefix, - name = format!("{} {}", $prefix, $name), + name = $name, attributes=attributes, fields=($($field = $field_value),*) ) } }; - (level= $lvl:expr, relation=$relation:expr, prefix= $prefix:expr, name= $name:expr, attributes=$attributes:ident, fields=($($field:ident = $field_value:expr),*)) => { + (level= $lvl:expr, relation=$relation:expr, name= $name:expr, attributes=$attributes:ident, fields=($($field:ident = $field_value:expr),*)) => { { use opentelemetry::{KeyValue, Context, trace::{Tracer, Link, TracerProvider, TraceContextExt}}; use restate_types::invocation::SpanRelation; @@ -493,6 +491,16 @@ macro_rules! info_invocation_span { fields = () ) }; + (relation=$relation:expr, id= $id:expr, name= $name:expr, tags=($($($key:ident).+ = $value:expr),*)) => { + $crate::invocation_span!( + level = ::tracing::Level::INFO, + relation = $relation, + id = $id, + name = $name, + tags = ($($($key).+ = $value),*), + fields = () + ) + }; } #[cfg(test)] diff --git a/crates/types/src/config/cli_option_overrides.rs b/crates/types/src/config/cli_option_overrides.rs index 6a1209b99..ad995389f 100644 --- a/crates/types/src/config/cli_option_overrides.rs +++ b/crates/types/src/config/cli_option_overrides.rs @@ -123,7 +123,7 @@ pub struct CommonOptionCliOverride { /// through [opentelemetry_otlp](https://docs.rs/opentelemetry-otlp/0.12.0/opentelemetry_otlp/). /// /// To configure the sampling, please refer to the [opentelemetry autoconfigure docs](https://github.com/open-telemetry/opentelemetry-java/blob/main/sdk-extensions/autoconfigure/README.md#sampler). - #[clap(long, env = "RESTATE_SERVICES_RUNTIME_ENDPOINT", global = true)] + #[clap(long, env = "RESTATE_TRACING_SERVICES_ENDPOINT", global = true)] pub tracing_services_endpoint: Option, /// # Distributed Tracing JSON Export Path diff --git a/crates/worker/src/partition/state_machine/mod.rs b/crates/worker/src/partition/state_machine/mod.rs index 45af086f9..b178d8b71 100644 --- a/crates/worker/src/partition/state_machine/mod.rs +++ b/crates/worker/src/partition/state_machine/mod.rs @@ -79,6 +79,7 @@ use restate_types::time::MillisSinceEpoch; use restate_wal_protocol::timer::TimerKeyDisplay; use restate_wal_protocol::timer::TimerKeyValue; use restate_wal_protocol::Command; +use std::borrow::Cow; use std::collections::HashSet; use std::fmt; use std::fmt::{Debug, Formatter}; @@ -1793,7 +1794,6 @@ impl StateMachine { .journal_metadata .span_context .as_parent(), - prefix = "set-state", id = invocation_id, name = format!("set-state {key:?}"), tags = (rpc.service = invocation_metadata @@ -1824,7 +1824,6 @@ impl StateMachine { .journal_metadata .span_context .as_parent(), - prefix = "clear-state", id = invocation_id, name = "clear-state", tags = (rpc.service = invocation_metadata @@ -1850,7 +1849,6 @@ impl StateMachine { .journal_metadata .span_context .as_parent(), - prefix = "clear-all-state", id = invocation_id, name = "clear-all-state", tags = (rpc.service = invocation_metadata @@ -2106,7 +2104,6 @@ impl StateMachine { .journal_metadata .span_context .as_parent(), - prefix = "sleep", id = invocation_id, name = "sleep", tags = (rpc.service = invocation_metadata @@ -2284,7 +2281,26 @@ impl StateMachine { ) .await?; } - EnrichedEntryHeader::Run { .. } | EnrichedEntryHeader::Custom { .. } => { + EnrichedEntryHeader::Run { .. } => { + let _span = instrumentation::info_invocation_span!( + relation = invocation_metadata + .journal_metadata + .span_context + .as_parent(), + id = invocation_id, + name = match journal_entry.deserialize_name::()?.as_deref() { + None | Some("") => Cow::Borrowed("run"), + Some(name) => Cow::Owned(format!("run {}", name)), + }, + tags = (rpc.service = invocation_metadata + .invocation_target + .service_name() + .to_string()) + ); + + // We just store it + } + EnrichedEntryHeader::Custom { .. } => { // We just store it } EntryHeader::CancelInvocation => { @@ -2760,8 +2776,8 @@ impl StateMachine { #[tracing::instrument( skip_all, - level="info", - name="suspend", + level="info", + name="suspend", fields( metadata.journal.length = metadata.journal_metadata.length, restate.invocation.id = %invocation_id) @@ -2925,8 +2941,8 @@ impl StateMachine { #[tracing::instrument( skip_all, - level="info", - name="set_state", + level="info", + name="set_state", fields( restate.invocation.id = %invocation_id, restate.state.key = ?key, @@ -2951,8 +2967,8 @@ impl StateMachine { #[tracing::instrument( skip_all, - level="info", - name="clear_state", + level="info", + name="clear_state", fields( restate.invocation.id = %invocation_id, restate.state.key = ?key, @@ -2976,8 +2992,8 @@ impl StateMachine { #[tracing::instrument( skip_all, - level="info", - name="clear_all_state", + level="info", + name="clear_all_state", fields( restate.invocation.id = %invocation_id, rpc.service = %service_id.service_name