diff --git a/pkg/service/node_static_inline.go b/pkg/service/node_static_inline.go index cc9add1..4e4f9dc 100644 --- a/pkg/service/node_static_inline.go +++ b/pkg/service/node_static_inline.go @@ -25,8 +25,10 @@ func (s *Service) nodePublishVolumeStaticInlineVolume(ctx context.Context, volum duration := time.Since(startedAt) logger.WithContext(ctx).Infof("pulled model: %s %s", reference, duration) + mountCtx, mountCancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second) + defer mountCancel() if err := mounter.Mount( - ctx, + mountCtx, mounter.NewBuilder(). Bind(). From(modelDir). diff --git a/pkg/service/worker.go b/pkg/service/worker.go index 8628736..98dd49f 100644 --- a/pkg/service/worker.go +++ b/pkg/service/worker.go @@ -165,8 +165,10 @@ func (worker *Worker) pullModel(ctx context.Context, statusPath, volumeName, mou } defer worker.kmutex.Unlock(contextKey) + // Decouple from the kubelet RPC deadline so that large pulls aren't killed + // when kubelet times out and retries var cancel context.CancelFunc - ctx, cancel = context.WithCancel(ctx) + ctx, cancel = context.WithCancel(context.WithoutCancel(ctx)) worker.contextMap.Set(contextKey, &cancel) defer worker.contextMap.Set(contextKey, nil)