Skip to content

Commit 55208ed

Browse files
committed
perf(runtime-service): use short retry when no peers available
The runtime service tries to download the finalized block runtime immediately at startup, before peer connections are established. This always fails with StorageQueryError { errors: [] } (no peers to query). Previously, this triggered the full 4s retry_after_failed cooldown, making warm start consistently ~5-7s. Now, "no peers" errors use a 200ms retry instead of 4s. Peers typically connect within a few hundred milliseconds, so the retry succeeds quickly. Other errors (peer misbehavior, decode failures) still use the full 4s cooldown. Benchmark on Polkadot: warm start drops from ~5.5s to ~600ms.
1 parent 2cf734a commit 55208ed

3 files changed

Lines changed: 73 additions & 5 deletions

File tree

lib/src/chain/async_tree.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,48 @@ where
550550
}
551551
}
552552

553+
/// Similar to [`AsyncTree::async_op_failure`], but uses a caller-provided timeout
554+
/// instead of [`Config::retry_after_failed`].
555+
pub fn async_op_failure_with_timeout(&mut self, async_op_id: AsyncOpId, retry_after: &TNow) {
556+
for index in self
557+
.non_finalized_blocks
558+
.iter_ancestry_order()
559+
.map(|(index, _)| index)
560+
.collect::<Vec<_>>()
561+
.into_iter()
562+
.rev()
563+
{
564+
let new_timeout = match self.non_finalized_blocks.get_mut(index).unwrap().async_op {
565+
AsyncOpState::InProgress {
566+
async_op_id: id,
567+
timeout: Some(ref timeout),
568+
} if id == async_op_id => Some(cmp::min(timeout.clone(), retry_after.clone())),
569+
AsyncOpState::InProgress {
570+
async_op_id: id,
571+
timeout: None,
572+
} if id == async_op_id => Some(retry_after.clone()),
573+
_ => continue,
574+
};
575+
576+
let same_as_parent = self
577+
.non_finalized_blocks
578+
.parent(index)
579+
.map_or(false, |idx| {
580+
match self.non_finalized_blocks.get(idx).unwrap().async_op {
581+
AsyncOpState::InProgress {
582+
async_op_id: id, ..
583+
} => id == async_op_id,
584+
_ => false,
585+
}
586+
});
587+
588+
self.non_finalized_blocks.get_mut(index).unwrap().async_op = AsyncOpState::Pending {
589+
same_as_parent,
590+
timeout: new_timeout,
591+
};
592+
}
593+
}
594+
553595
/// Examines the state of `self` and, if a block's asynchronous operation should be started,
554596
/// changes the state of the block to "in progress" and returns the parameters of the
555597
/// operation.

light-base/src/runtime_service.rs

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2810,12 +2810,26 @@ async fn run_background<TPlat: PlatformRef>(
28102810
);
28112811
}
28122812

2813-
match &mut background.tree {
2814-
Tree::FinalizedBlockRuntimeKnown { tree, .. } => {
2815-
tree.async_op_failure(async_op_id, &background.platform.now());
2813+
if error.is_no_peers() {
2814+
// No peers available yet — use a short retry (200ms) instead of
2815+
// the full 4s cooldown. Peers typically connect within milliseconds.
2816+
let short_retry = background.platform.now() + Duration::from_millis(200);
2817+
match &mut background.tree {
2818+
Tree::FinalizedBlockRuntimeKnown { tree, .. } => {
2819+
tree.async_op_failure_with_timeout(async_op_id, &short_retry);
2820+
}
2821+
Tree::FinalizedBlockRuntimeUnknown { tree, .. } => {
2822+
tree.async_op_failure_with_timeout(async_op_id, &short_retry);
2823+
}
28162824
}
2817-
Tree::FinalizedBlockRuntimeUnknown { tree, .. } => {
2818-
tree.async_op_failure(async_op_id, &background.platform.now());
2825+
} else {
2826+
match &mut background.tree {
2827+
Tree::FinalizedBlockRuntimeKnown { tree, .. } => {
2828+
tree.async_op_failure(async_op_id, &background.platform.now());
2829+
}
2830+
Tree::FinalizedBlockRuntimeUnknown { tree, .. } => {
2831+
tree.async_op_failure(async_op_id, &background.platform.now());
2832+
}
28192833
}
28202834
}
28212835
}
@@ -2832,6 +2846,13 @@ enum RuntimeDownloadError {
28322846
}
28332847

28342848
impl RuntimeDownloadError {
2849+
fn is_no_peers(&self) -> bool {
2850+
match self {
2851+
RuntimeDownloadError::StorageQuery(err) => err.is_no_peers(),
2852+
RuntimeDownloadError::InvalidHeader(_) => false,
2853+
}
2854+
}
2855+
28352856
/// Returns `true` if this is caused by networking issues, as opposed to a consensus-related
28362857
/// issue.
28372858
fn is_network_problem(&self) -> bool {

light-base/src/sync_service.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,11 @@ pub struct StorageQueryError {
10401040
}
10411041

10421042
impl StorageQueryError {
1043+
/// Returns `true` if no peers were available to query.
1044+
pub fn is_no_peers(&self) -> bool {
1045+
self.errors.is_empty()
1046+
}
1047+
10431048
/// Returns `true` if this is caused by networking issues, as opposed to a consensus-related
10441049
/// issue.
10451050
pub fn is_network_problem(&self) -> bool {

0 commit comments

Comments
 (0)