diff --git a/CHANGELOG.md b/CHANGELOG.md
index 982b7c6df..83dd65d53 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Fixed
+- **The whole app restarted in a host crash-loop whenever a download-monitor cycle hit a transient SQLite "database is locked".** `DownloadMonitorService`'s poll loop caught only `OperationCanceledException`, so any other exception thrown by a cycle — in practice a `Microsoft.Data.Sqlite.SqliteException` ("database is locked") raised while persisting a download via `EfDownloadRepository.UpdateAsync`/`GetByIdAsync` — propagated out of `ExecuteAsync`. An unhandled exception out of a `BackgroundService` trips the .NET host's default `BackgroundServiceExceptionBehavior.StopHost`, so the host shut down gracefully (exit 0) and a container restart policy brought it back — repeating on roughly every poll cycle (observed ~18 restarts in 9h). The other background services (`QueueMonitorService`, `MovedDownloadProcessor`, `AutomaticSearchService`) already log-and-continue on a failed cycle; `DownloadMonitorService` was the lone exception. The poll loop (extracted to `RunMonitorLoopAsync` for isolation/testability) now swallows-and-continues on any non-cancellation exception (logging "Error in Download Monitor Service") so a transient lock no longer takes down the app. Unit-covered. (The underlying SQLite write contention — the connection string sets no `busy_timeout` — is a separate hardening follow-up; this stops a single transient error from being fatal.)
- **Authentication settings: startup-config save no longer offers a downloadable `config.json` fallback when the backend refuses the save as invalid.** `SettingsView.saveSettings()` previously wrapped `apiService.saveStartupConfig` in a bare `catch {}` and treated every failure as a disk-persistence problem — offering the user a downloadable `config.json` containing the *server-rejected* values so they could save it manually. That bypasses the new backend admin-existence guard entirely: a user who tries to enable the login screen with no admin user gets the backend's 400, the FE catches it, and the FE offers a download of the same `AuthenticationRequired=true` config the server just refused. The catch now inspects the thrown error's `status`: 4xx responses are validation refusals and surface as a hard error toast (no download offered); 5xx and network failures fall through to the existing download fallback, which is the right escape hatch for "server wants to save but can't write to disk."
- **Authentication settings: enabling the login screen now refuses to persist when no admin user exists.** `ConfigurationService.SaveStartupConfigAsync` queries `IUserService.GetAdminUsersAsync` whenever the incoming save *transitions* `AuthenticationRequired` from disabled to enabled, and throws if the admin user list is empty. This closes the carveout left by the credential-visibility and admin-provisioning fixes below: the settings DTO clears blank fields before save, so a user who flips "Enable login screen" with empty (or username-only) admin credentials silently skipped provisioning entirely and still reached the startup-config write, locking themselves out of an admin-less instance (recoverable by editing `config/config.json` back to `"AuthenticationRequired": "false"`, but a confusing first-time-setup trap). The check is scoped to the transition: subsequent saves while auth is already on (API key regenerations, port changes, log-level tweaks) don't re-query the admin list, and the common "just updating other startup fields with auth off" path stays unaffected. The admin block in `SaveApplicationSettings` runs before the startup-config write in the same save flow, so the typical "supply credentials and enable login in the same save" sequence has the admin row in place by the time the check runs.
- **Authentication settings: admin provisioning failures no longer silently let the auth-required toggle proceed.** `ConfigurationService.SaveApplicationSettingsAsync` previously caught any exception from `CreateUserAsync` / `UpdatePasswordAsync`, logged it, and returned successfully — so when admin credentials were supplied but the user-service rejected them (password policy violation, repo I/O error, concurrent-write race), `SettingsView.saveSettings()` would still go on to persist `AuthenticationRequired=true` on its second request. The result was an instance that required login but had no working admin account — exactly the lockout shape the credential-visibility fix below was meant to prevent. The catch now re-throws the failure so the caller aborts before the auth-toggle write. The settings row itself is still saved before the admin block (non-admin changes like notification triggers and webhooks shouldn't disappear because admin provisioning failed), and the no-credentials path remains an unchanged silent skip.
diff --git a/listenarr.application/Downloads/DownloadMonitorService.cs b/listenarr.application/Downloads/DownloadMonitorService.cs
index 2bd485210..ba2bcedec 100644
--- a/listenarr.application/Downloads/DownloadMonitorService.cs
+++ b/listenarr.application/Downloads/DownloadMonitorService.cs
@@ -92,21 +92,53 @@ protected override async Task ExecuteAsync(CancellationToken cancellationToken)
}
logger.LogInformation($"DownloadMonitorService polling interval set to {_pollingInterval}s");
+ await RunMonitorLoopAsync(cancellationToken);
+
+ logger.LogInformation("Download Monitor Service stopping");
+ }
+
+ ///
+ /// The poll loop. Each cycle is isolated: a non-cancellation failure (e.g. a transient
+ /// SQLite "database is locked") is logged and the loop continues, matching the sibling
+ /// background services (QueueMonitorService, MovedDownloadProcessor, AutomaticSearchService).
+ /// This must not let an exception escape - an unhandled exception out of a BackgroundService's
+ /// ExecuteAsync trips BackgroundServiceExceptionBehavior.StopHost and restarts the whole app
+ /// (previously this loop caught only OperationCanceledException, so a transient DB lock
+ /// crash-looped the container every poll cycle).
+ ///
+ internal async Task RunMonitorLoopAsync(CancellationToken cancellationToken)
+ {
while (!cancellationToken.IsCancellationRequested)
{
try
{
await MonitorDownloadsAsync(cancellationToken);
+ }
+ catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
+ {
+ // Cancellation requested - exit gracefully
+ break;
+ }
+ catch (OperationCanceledException ex)
+ {
+ // A transient timeout/cancellation within a cycle - log and keep monitoring
+ logger.LogWarning(ex, "Download monitor cycle canceled/timed out; continuing");
+ }
+ catch (Exception ex) when (ex is not OperationCanceledException && ex is not OutOfMemoryException && ex is not StackOverflowException)
+ {
+ logger.LogError(ex, "Error in Download Monitor Service");
+ }
+ // Wait before next poll
+ try
+ {
await Task.Delay(TimeSpan.FromSeconds(_pollingInterval), cancellationToken);
}
catch (OperationCanceledException)
{
- // Those exceptions are expected, service should stop gracefully
+ break;
}
}
-
- logger.LogInformation("Download Monitor Service stopping");
}
internal async Task MonitorDownloadsAsync(CancellationToken cancellationToken)
diff --git a/tests/Features/Application/Downloads/DownloadMonitorServiceResilienceTests.cs b/tests/Features/Application/Downloads/DownloadMonitorServiceResilienceTests.cs
new file mode 100644
index 000000000..984534249
--- /dev/null
+++ b/tests/Features/Application/Downloads/DownloadMonitorServiceResilienceTests.cs
@@ -0,0 +1,81 @@
+using System.Diagnostics;
+using Listenarr.Application.Downloads;
+using Listenarr.Application.Interfaces;
+using Listenarr.Application.Interfaces.Repositories;
+using Listenarr.Domain.Models.Configurations;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging.Abstractions;
+using Moq;
+using Xunit;
+
+namespace Listenarr.Tests.Features.Application.Downloads
+{
+ [Trait("Name", "DownloadMonitorServiceResilienceTests")]
+ [Trait("Category", "DownloadMonitorService")]
+ public class DownloadMonitorServiceResilienceTests
+ {
+ // Regression guard for the host restart loop: a transient failure inside a monitor cycle
+ // (e.g. a SQLite "database is locked" exception thrown while persisting a download) must be
+ // logged-and-swallowed so the poll loop keeps running. The loop previously caught only
+ // OperationCanceledException, so any other exception propagated out of ExecuteAsync and
+ // tripped BackgroundServiceExceptionBehavior.StopHost, gracefully restarting the whole app
+ // on every poll cycle.
+ //
+ // Drives the extracted loop (RunMonitorLoopAsync) directly to avoid the service's fixed 5s
+ // startup delay, keeping the test fast and free of timing pressure on the parallel suite.
+ [Fact]
+ [Trait("Method", "RunMonitorLoopAsync")]
+ public async Task RunMonitorLoopAsync_CycleThrows_KeepsLoopingInsteadOfStoppingHost()
+ {
+ int cycleAttempts = 0;
+
+ var config = new Mock();
+ // Every cycle fails the way the live host did: a non-cancellation exception raised while
+ // the cycle is talking to the database.
+ config.Setup(c => c.GetDownloadClientConfigurationsAsync())
+ .Returns(() =>
+ {
+ Interlocked.Increment(ref cycleAttempts);
+ throw new InvalidOperationException("simulated transient SQLite 'database is locked'");
+ });
+ config.Setup(c => c.GetApplicationSettingsAsync())
+ .ReturnsAsync(new ApplicationSettings());
+
+ var provider = new Mock();
+ provider.Setup(p => p.GetService(typeof(IConfigurationService))).Returns(config.Object);
+ provider.Setup(p => p.GetService(typeof(IDownloadRepository))).Returns(Mock.Of());
+ provider.Setup(p => p.GetService(typeof(IDownloadClientGateway))).Returns(Mock.Of());
+
+ var scope = new Mock();
+ scope.SetupGet(s => s.ServiceProvider).Returns(provider.Object);
+
+ var scopeFactory = new Mock();
+ scopeFactory.Setup(f => f.CreateScope()).Returns(scope.Object);
+
+ var service = new DownloadMonitorService(
+ scopeFactory.Object,
+ Mock.Of(),
+ NullLogger.Instance);
+
+ using var cts = new CancellationTokenSource();
+ var loopTask = service.RunMonitorLoopAsync(cts.Token);
+
+ // Wait for the first cycle to be attempted (and thrown + swallowed). With the default
+ // polling interval the loop then parks in its inter-cycle delay rather than completing.
+ var sw = Stopwatch.StartNew();
+ while (Volatile.Read(ref cycleAttempts) < 1 && sw.Elapsed < TimeSpan.FromSeconds(5))
+ {
+ await Task.Delay(20);
+ }
+
+ Assert.True(cycleAttempts >= 1, "The monitor loop never executed a cycle.");
+ // If the cycle's exception had propagated (the bug), the loop task would be faulted/
+ // completed by now. It must still be running.
+ Assert.False(loopTask.IsCompleted,
+ "RunMonitorLoopAsync stopped after a cycle threw - the exception propagated instead of being swallowed.");
+
+ cts.Cancel();
+ await loopTask; // must complete cleanly (no exception escapes the loop)
+ }
+ }
+}