-
Notifications
You must be signed in to change notification settings - Fork 623
Fetch updated attested nodes in bulk #7022
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
3a57982
be33699
3c0ff15
e1fc8d3
e1accc5
1e7fed2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,8 @@ import ( | |
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "maps" | ||
| "slices" | ||
| "time" | ||
|
|
||
| "github.com/andres-erbsen/clock" | ||
|
|
@@ -35,6 +37,7 @@ type attestedNodes struct { | |
|
|
||
| eventTracker *eventTracker | ||
| eventTimeout time.Duration | ||
| pageSize int32 | ||
|
|
||
| fetchNodes map[string]struct{} | ||
|
|
||
|
|
@@ -158,7 +161,11 @@ func (a *attestedNodes) loadCache(ctx context.Context) error { | |
|
|
||
| // buildAttestedNodesCache fetches all attested nodes and adds the unexpired ones to the cache. | ||
| // It runs once at startup. | ||
| func buildAttestedNodesCache(ctx context.Context, log logrus.FieldLogger, metrics telemetry.Metrics, ds datastore.DataStore, clk clock.Clock, cache *authorizedentries.Cache, nodeCache *nodecache.Cache, cacheReloadInterval, eventTimeout time.Duration) (*attestedNodes, error) { | ||
| func buildAttestedNodesCache(ctx context.Context, log logrus.FieldLogger, metrics telemetry.Metrics, ds datastore.DataStore, clk clock.Clock, cache *authorizedentries.Cache, nodeCache *nodecache.Cache, pageSize int32, cacheReloadInterval, eventTimeout time.Duration) (*attestedNodes, error) { | ||
| if pageSize <= 0 { | ||
| return nil, fmt.Errorf("page size must be positive, got %d", pageSize) | ||
| } | ||
|
|
||
| pollPeriods := PollPeriods(cacheReloadInterval, eventTimeout) | ||
|
|
||
| attestedNodes := &attestedNodes{ | ||
|
|
@@ -169,6 +176,7 @@ func buildAttestedNodesCache(ctx context.Context, log logrus.FieldLogger, metric | |
| log: log, | ||
| metrics: metrics, | ||
| eventTimeout: eventTimeout, | ||
| pageSize: pageSize, | ||
|
|
||
| eventsBeforeFirst: make(map[uint]struct{}), | ||
| fetchNodes: make(map[string]struct{}), | ||
|
|
@@ -211,34 +219,39 @@ func (a *attestedNodes) updateCache(ctx context.Context) error { | |
| } | ||
|
|
||
| func (a *attestedNodes) updateCachedNodes(ctx context.Context) error { | ||
| for spiffeId := range a.fetchNodes { | ||
| node, err := a.ds.FetchAttestedNode(ctx, spiffeId) | ||
| spiffeIds := slices.Collect(maps.Keys(a.fetchNodes)) | ||
| for pageStart := 0; pageStart < len(spiffeIds); pageStart += int(a.pageSize) { | ||
| fetchNodes := a.fetchNodesPage(spiffeIds, pageStart) | ||
| nodes, err := a.ds.FetchAttestedNodes(ctx, fetchNodes) | ||
| if err != nil { | ||
| continue | ||
| return err | ||
| } | ||
|
Comment on lines
+222
to
228
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is intentional, to stay consistent with There's no data loss or extended-stale risk: IDs are only removed from I'd prefer to keep the two cache paths symmetric, but I'm happy to switch to per-page skip if you'd rather change both here and in the entries path. |
||
|
|
||
| // Node was deleted | ||
| if node == nil { | ||
| a.nodeCache.RemoveAttestedNode(spiffeId) | ||
| a.cache.RemoveAgent(spiffeId) | ||
| delete(a.fetchNodes, spiffeId) | ||
| continue | ||
| } | ||
| for _, spiffeId := range fetchNodes { | ||
| node, ok := nodes[spiffeId] | ||
| // Node was deleted | ||
| if !ok { | ||
| a.nodeCache.RemoveAttestedNode(spiffeId) | ||
| a.cache.RemoveAgent(spiffeId) | ||
| delete(a.fetchNodes, spiffeId) | ||
| continue | ||
| } | ||
|
|
||
| selectors, err := a.ds.GetNodeSelectors(ctx, spiffeId, datastore.RequireCurrent) | ||
| if err != nil { | ||
| continue | ||
| agentExpiresAt := time.Unix(node.CertNotAfter, 0) | ||
| a.cache.UpdateAgent(node.SpiffeId, agentExpiresAt, api.ProtoFromSelectors(node.Selectors)) | ||
| a.nodeCache.UpdateAttestedNode(node) | ||
|
nweisenauer-sap marked this conversation as resolved.
|
||
| delete(a.fetchNodes, spiffeId) | ||
| } | ||
| node.Selectors = selectors | ||
|
|
||
| agentExpiresAt := time.Unix(node.CertNotAfter, 0) | ||
| a.cache.UpdateAgent(node.SpiffeId, agentExpiresAt, api.ProtoFromSelectors(node.Selectors)) | ||
| a.nodeCache.UpdateAttestedNode(node) | ||
| delete(a.fetchNodes, spiffeId) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| // fetchNodesPage gets the range for the page starting at pageStart | ||
| func (a *attestedNodes) fetchNodesPage(spiffeIds []string, pageStart int) []string { | ||
| pageEnd := min(len(spiffeIds), pageStart+int(a.pageSize)) | ||
| return spiffeIds[pageStart:pageEnd] | ||
| } | ||
|
|
||
| func (a *attestedNodes) emitMetrics() { | ||
| if a.skippedNodeEvents != a.eventTracker.EventCount() { | ||
| a.skippedNodeEvents = a.eventTracker.EventCount() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed this would improve observability. I've left it sharing
StartFetchNodeCallfor now to match the existing convention — the bulkFetchRegistrationEntriessimilarly reusesStartFetchRegistrationCall.To keep telemetry consistent, I'd suggest introducing dedicated batch metrics for both bulk methods (plus the corresponding
telemetry_config.mdentries) in a separate follow-up rather than diverging here. Let me know if you'd prefer it in this PR.