Skip to content

Commit

Permalink
Relaunch [RPC Gateway Fallback Resign] Implement the online path (#629)
Browse files Browse the repository at this point in the history
This is re-launch of Uniswap/routing-api#601.

The original PR's caused environmental variable size to be bigger than 4KB and thus cannot be deployed. Since then we've removed a few environmental variables from beta and prod RoutingLambda and cleaned some of the secrets' value that end up being in the environmental variables.

There is no code change compared with the [original](Uniswap/routing-api#601) (merged but [reverted](Uniswap/routing-api#627)) PR.
  • Loading branch information
treeMan0301 committed Apr 24, 2024
1 parent 8c907d9 commit 744d842
Show file tree
Hide file tree
Showing 16 changed files with 231 additions and 1,170 deletions.
3 changes: 1 addition & 2 deletions bin/stacks/routing-api-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ export class RoutingAPIStack extends cdk.Stack {
cachedV3PoolsDynamoDb,
cachedV2PairsDynamoDb,
tokenPropertiesCachingDynamoDb,
rpcProviderStateDynamoDb,
rpcProviderHealthStateDynamoDb,
} = new RoutingDatabaseStack(this, 'RoutingDatabaseStack', {})

Expand All @@ -115,7 +114,7 @@ export class RoutingAPIStack extends cdk.Stack {
cachedV3PoolsDynamoDb,
cachedV2PairsDynamoDb,
tokenPropertiesCachingDynamoDb,
rpcProviderStateDynamoDb,
rpcProviderHealthStateDynamoDb,
unicornSecret,
})

Expand Down
6 changes: 3 additions & 3 deletions bin/stacks/routing-database-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export const DynamoDBTableProps = {
Name: 'TokenPropertiesCachingDb',
PartitionKeyName: 'chainIdTokenAddress',
},
// NOTICE: This table is obsolete. Do not touch it.
RpcProviderStateDbTable: {
Name: 'RpcProviderState',
PartitionKeyName: 'chainIdProviderName',
Expand All @@ -64,7 +65,6 @@ export class RoutingDatabaseStack extends cdk.NestedStack {
public readonly cachedV3PoolsDynamoDb: aws_dynamodb.Table
public readonly cachedV2PairsDynamoDb: aws_dynamodb.Table
public readonly tokenPropertiesCachingDynamoDb: aws_dynamodb.Table
public readonly rpcProviderStateDynamoDb: aws_dynamodb.Table
public readonly rpcProviderHealthStateDynamoDb: aws_dynamodb.Table

constructor(scope: Construct, name: string, props: RoutingDatabaseStackProps) {
Expand Down Expand Up @@ -153,8 +153,8 @@ export class RoutingDatabaseStack extends cdk.NestedStack {
}
)

// NOTICE: This table will become useless after we fully migrate to rpcProviderHealthStateDynamoDb
this.rpcProviderStateDynamoDb = new aws_dynamodb.Table(this, DynamoDBTableProps.RpcProviderStateDbTable.Name, {
// NOTICE: This table has become useless after we fully migrate to rpcProviderHealthStateDynamoDb
new aws_dynamodb.Table(this, DynamoDBTableProps.RpcProviderStateDbTable.Name, {
tableName: DynamoDBTableProps.RpcProviderStateDbTable.Name,
partitionKey: {
name: DynamoDBTableProps.RpcProviderStateDbTable.PartitionKeyName,
Expand Down
8 changes: 4 additions & 4 deletions bin/stacks/routing-lambda-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export interface RoutingLambdaStackProps extends cdk.NestedStackProps {
cachedV3PoolsDynamoDb: aws_dynamodb.Table
cachedV2PairsDynamoDb: aws_dynamodb.Table
tokenPropertiesCachingDynamoDb: aws_dynamodb.Table
rpcProviderStateDynamoDb: aws_dynamodb.Table
rpcProviderHealthStateDynamoDb: aws_dynamodb.Table
unicornSecret: string
}
export class RoutingLambdaStack extends cdk.NestedStack {
Expand Down Expand Up @@ -61,7 +61,7 @@ export class RoutingLambdaStack extends cdk.NestedStack {
cachedV3PoolsDynamoDb,
cachedV2PairsDynamoDb,
tokenPropertiesCachingDynamoDb,
rpcProviderStateDynamoDb,
rpcProviderHealthStateDynamoDb,
unicornSecret,
} = props

Expand All @@ -88,7 +88,7 @@ export class RoutingLambdaStack extends cdk.NestedStack {
cachedV3PoolsDynamoDb.grantReadWriteData(lambdaRole)
cachedV2PairsDynamoDb.grantReadWriteData(lambdaRole)
tokenPropertiesCachingDynamoDb.grantReadWriteData(lambdaRole)
rpcProviderStateDynamoDb.grantReadWriteData(lambdaRole)
rpcProviderHealthStateDynamoDb.grantReadWriteData(lambdaRole)

const region = cdk.Stack.of(this).region

Expand Down Expand Up @@ -134,7 +134,7 @@ export class RoutingLambdaStack extends cdk.NestedStack {
CACHING_REQUEST_FLAG_TABLE_NAME: DynamoDBTableProps.CachingRequestFlagDynamoDbTable.Name,
CACHED_V3_POOLS_TABLE_NAME: DynamoDBTableProps.V3PoolsDynamoDbTable.Name,
V2_PAIRS_CACHE_TABLE_NAME: DynamoDBTableProps.V2PairsDynamoCache.Name,
RPC_PROVIDER_HEALTH_TABLE_NAME: DynamoDBTableProps.RpcProviderStateDbTable.Name,
RPC_PROVIDER_HEALTH_TABLE_NAME: DynamoDBTableProps.RpcProviderHealthStateDbTable.Name,

// tokenPropertiesCachingDynamoDb.tableName is the correct format.
// we will start using the correct ones going forward
Expand Down
87 changes: 28 additions & 59 deletions bin/stacks/rpc-gateway-dashboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,45 +38,55 @@ function getSelectMetricsForChain(chainId: ChainId) {
return metrics
}

function getHealthScoreMetricsForChain(chainId: ChainId) {
function getProviderDbHealthStateChangeForChain(chainId: ChainId) {
const metrics = []
for (const providerName of getProviderNameForChain(chainId)) {
const providerId = getProviderId(chainId, providerName)
metrics.push([
'Uniswap',
`RPC_GATEWAY_FALLBACK_${providerId}_INTO_UNHEALTHY`,
'Service',
'RoutingAPI',
{
id: `db_into_unhealthy_${chainId}_${providerName}`,
label: `${providerName} DB into UNHEALTHY ${ID_TO_NETWORK_NAME(chainId)}`,
},
])
metrics.push([
'Uniswap',
`RPC_GATEWAY_${chainId}_${providerName}_health_score`,
`RPC_GATEWAY_FALLBACK_${providerId}_INTO_HEALTHY`,
'Service',
'RoutingAPI',
{
id: `health_score_${chainId}_${providerName}`,
label: `${providerName} health score on ${ID_TO_NETWORK_NAME(chainId)}`,
id: `db_into_healthy_${chainId}_${providerName}`,
label: `${providerName} DB into HEALTHY ${ID_TO_NETWORK_NAME(chainId)}`,
},
])
}
return metrics
}

function getProviderDbHealthStateChangeForChain(chainId: ChainId) {
function getProviderHealthStateChangeForChain(chainId: ChainId) {
const metrics = []
for (const providerName of getProviderNameForChain(chainId)) {
const providerId = getProviderId(chainId, providerName)
metrics.push([
'Uniswap',
`RPC_GATEWAY_FALLBACK_${providerId}_INTO_UNHEALTHY`,
`RPC_GATEWAY_${chainId}_${providerName}_becomes_UNHEALTHY`,
'Service',
'RoutingAPI',
{
id: `db_into_unhealthy_${chainId}_${providerName}`,
label: `${providerName} DB into UNHEALTHY ${ID_TO_NETWORK_NAME(chainId)}`,
id: `provider_into_unhealthy_${chainId}_${providerName}`,
label: `${providerName} into UNHEALTHY ${ID_TO_NETWORK_NAME(chainId)}`,
},
])
metrics.push([
'Uniswap',
`RPC_GATEWAY_FALLBACK_${providerId}_INTO_HEALTHY`,
`RPC_GATEWAY_${chainId}_${providerName}_becomes_HEALTHY`,
'Service',
'RoutingAPI',
{
id: `db_into_healthy_${chainId}_${providerName}`,
label: `${providerName} DB into HEALTHY ${ID_TO_NETWORK_NAME(chainId)}`,
id: `provider_into_healthy_${chainId}_${providerName}`,
label: `${providerName} into HEALTHY ${ID_TO_NETWORK_NAME(chainId)}`,
},
])
}
Expand Down Expand Up @@ -122,26 +132,6 @@ function getSuccessMetricsForChain(chainId: ChainId) {
return metrics
}

function getHighLatencyMetricsForChain(chainId: ChainId) {
const metrics = []
const methodNames = ['call', 'send', 'getGasPrice', 'getBlockNumber']
for (const providerName of getProviderNameForChain(chainId)) {
for (const methodName of methodNames) {
metrics.push([
'Uniswap',
`RPC_GATEWAY_${chainId}_${providerName}_${methodName}_SUCCESS_HIGH_LATENCY`,
'Service',
'RoutingAPI',
{
id: `${methodName}_high_latency_${chainId}_${providerName}`,
label: `${providerName} ${methodName} high latency on ${ID_TO_NETWORK_NAME(chainId)}`,
},
])
}
}
return metrics
}

function getFailedMetricsForChain(chainId: ChainId) {
const metrics = []
const methodNames = ['call', 'send', 'getGasPrice', 'getBlockNumber']
Expand Down Expand Up @@ -607,18 +597,18 @@ export class RpcGatewayDashboardStack extends cdk.NestedStack {
width: 24,
type: 'metric',
properties: {
metrics: getHealthScoreMetricsForChain(chainId),
metrics: getProviderDbHealthStateChangeForChain(chainId),
view: 'timeSeries',
stacked: false,
region,
stat: 'Maximum',
period: 300,
title: `Provider (negative) health score for ${ID_TO_NETWORK_NAME(chainId)}`,
title: `Provider DB health change for ${ID_TO_NETWORK_NAME(chainId)}`,
setPeriodToTimeRange: true,
yAxis: {
left: {
showUnits: false,
label: 'Score (in negative)',
label: 'DB health state changes',
},
},
},
Expand All @@ -628,18 +618,18 @@ export class RpcGatewayDashboardStack extends cdk.NestedStack {
width: 24,
type: 'metric',
properties: {
metrics: getProviderDbHealthStateChangeForChain(chainId),
metrics: getProviderHealthStateChangeForChain(chainId),
view: 'timeSeries',
stacked: false,
region,
stat: 'Maximum',
period: 300,
title: `Provider DB health change for ${ID_TO_NETWORK_NAME(chainId)}`,
title: `Provider health state for ${ID_TO_NETWORK_NAME(chainId)}`,
setPeriodToTimeRange: true,
yAxis: {
left: {
showUnits: false,
label: 'DB health state changes',
label: 'Health state changes',
},
},
},
Expand Down Expand Up @@ -707,27 +697,6 @@ export class RpcGatewayDashboardStack extends cdk.NestedStack {
},
},
},
{
height: 8,
width: 24,
type: 'metric',
properties: {
metrics: getHighLatencyMetricsForChain(chainId),
view: 'timeSeries',
stacked: false,
region,
stat: 'Sum',
period: 300,
title: `Provider high latency occurrence for ${ID_TO_NETWORK_NAME(chainId)}`,
setPeriodToTimeRange: true,
yAxis: {
left: {
showUnits: false,
label: 'Requests',
},
},
},
},
{
height: 8,
width: 24,
Expand Down
4 changes: 2 additions & 2 deletions bin/stacks/rpc-gateway-fallback-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ export class RpcGatewayFallbackStack extends cdk.NestedStack {
metric,
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
// TODO(jie): Resume to a reasonable threshold once we verified the workflow in prod.
threshold: 0.1, // Alarm when error rate >= 0.1%
threshold: 1.0, // Alarm when error rate >= 1.0%
evaluationPeriods: 1,
})

Expand Down Expand Up @@ -127,7 +127,7 @@ export class RpcGatewayFallbackStack extends cdk.NestedStack {
metric,
comparisonOperator: ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
// TODO(jie): Resume to a reasonable threshold once we verified the workflow in prod.
threshold: 100, // Alarm when latency >= 100ms
threshold: 150, // Alarm when latency >= 150ms
evaluationPeriods: 1,
})

Expand Down
16 changes: 0 additions & 16 deletions lib/rpc/ProviderState.ts

This file was deleted.

104 changes: 0 additions & 104 deletions lib/rpc/ProviderStateDynamoDbRepository.ts

This file was deleted.

Loading

0 comments on commit 744d842

Please sign in to comment.