diff --git a/internal/pkg/archiver/general/body.go b/internal/pkg/archiver/general/body.go index e22372fa..58c2212d 100644 --- a/internal/pkg/archiver/general/body.go +++ b/internal/pkg/archiver/general/body.go @@ -47,6 +47,7 @@ func processBody(u *models.URL, disableAssetsCapture, domainsCrawl bool, maxHops if err := connutil.CopyWithTimeout(io.Discard, u.GetResponse().Body); err != nil { return err } + return nil } // Get a buffer from the pool for MIME type detection diff --git a/internal/pkg/archiver/headless/archiver.go b/internal/pkg/archiver/headless/archiver.go index b982f56b..9aba5fdd 100644 --- a/internal/pkg/archiver/headless/archiver.go +++ b/internal/pkg/archiver/headless/archiver.go @@ -327,11 +327,16 @@ func archivePage(warcClient *warc.CustomHTTPClient, item *models.Item, seed *mod // Navigate to the URL logger.Debug("navigating to URL") - go router.Run() + ready := make(chan struct{}) + + go func() { + defer close(ready) // router is now running and handlers are active + router.Run() + }() // Wait for the router to start to avoid race condition in rod // The race happens between router.Run() initializing events and page.Navigate() triggering events. - time.Sleep(100 * time.Millisecond) + <-ready err = page.Navigate(item.GetURL().String()) if err != nil { diff --git a/internal/pkg/archiver/ratelimiter/ratelimiter.go b/internal/pkg/archiver/ratelimiter/ratelimiter.go index 305f4877..c27623b2 100644 --- a/internal/pkg/archiver/ratelimiter/ratelimiter.go +++ b/internal/pkg/archiver/ratelimiter/ratelimiter.go @@ -59,8 +59,14 @@ func (tb *tokenBucket) Wait() { tb.mu.Unlock() return } + + // Calculate exact time until next token is available + // instead of busy-waiting with an arbitrary sleep duration. + tokensNeeded := 1.0 - tb.tokens + waitDuration := time.Duration(tokensNeeded / tb.refillRate * float64(time.Second)) tb.mu.Unlock() - time.Sleep(50 * time.Millisecond) // adjust as needed + + time.Sleep(waitDuration) } } diff --git a/pkg/models/item_dedupe.go b/pkg/models/item_dedupe.go index f700cf6e..38463674 100644 --- a/pkg/models/item_dedupe.go +++ b/pkg/models/item_dedupe.go @@ -38,10 +38,10 @@ func flattenTree(root *Item) []*Item { var nodes []*Item var traverse func(node *Item) traverse = func(node *Item) { - nodes = append(nodes, node) if node == nil { return } + nodes = append(nodes, node) for _, child := range node.GetChildren() { traverse(child) }