Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
49669df
LRPTests: retry on transient MSIX install races
kythant May 28, 2026
9df4bb7
Test::Bootstrap+Package: retry transient deployment races at the source
kythant May 29, 2026
2348857
BypassTests: baseline ChannelRequestCheckExpirationTime on Server 2025
kythant May 29, 2026
04ff986
Test::Bootstrap+Package: fix retry-helper compile errors
kythant May 29, 2026
2bf5019
Test::Package retry: use HRESULT_FROM_WIN32 with raw win32 codes
kythant May 29, 2026
627fd77
BypassTests: baseline ChannelRequestCheckExpirationTime on Win11 24H2…
kythant Jun 1, 2026
e7698d6
PushNotifications: retry ChannelRequestCheckExpirationTime on WNS flake
kythant Jun 1, 2026
452f797
Test reliability: wait for package enumerability instead of retrying …
kythant Jun 1, 2026
4082979
PushNotifications: move SkipIfWnsServiceError above first caller
kythant Jun 1, 2026
1273072
AddPackage wait: poll FindPackagesForUserWithPackageTypes + Status.Ve…
kythant Jun 2, 2026
945f51f
Bootstrap: drop comment + variable extraction, just call VERIFY_SUCCE…
kythant Jun 2, 2026
2cbccca
Address review NITs: use symbolic ERROR_* names
kythant Jun 3, 2026
3037a3c
Test::Package: drop ERROR_INSTALL_* #ifndef fallbacks
kythant Jun 3, 2026
6c8cefc
Revert: restore ERROR_INSTALL_* #ifndef fallbacks
kythant Jun 3, 2026
c73504b
Test::Package: use correct symbolic names for 0x3D02 / 0x3CFF
kythant Jun 3, 2026
065248b
LRPTests: retry CoCreateInstance on transient install-lock HRESULTs
kythant Jun 3, 2026
35e9d5b
LRPTests: use ERROR_PACKAGES_IN_USE / ERROR_INSTALL_POLICY_FAILURE sy…
kythant Jun 3, 2026
166f098
Bootstrap: restore short retry on residual 0x80270254 after WaitForPa…
kythant Jun 4, 2026
44746b2
Bootstrap retry: compare against raw 0x80270254 HRESULT (APPX facilit…
kythant Jun 4, 2026
50a519e
Bootstrap retry: bump budget to ~3min (10 attempts x 30s cap)
kythant Jun 4, 2026
cd2372c
Revert: bootstrap retry budget back to 5 attempts x 8s cap
kythant Jun 4, 2026
7b6415c
Bootstrap: poll the exact precondition FindDDLMViaEnumeration checks
kythant Jun 5, 2026
1ef6a7f
Bootstrap: also poll unscoped Main enumeration in WaitForDDLMBootstra…
kythant Jun 5, 2026
0256280
Bootstrap: simplify back to bounded retry on 0x80270254
kythant Jun 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/BypassTests.json
Original file line number Diff line number Diff line change
Expand Up @@ -1621,6 +1621,7 @@
"release_x64_Windows.10.Enterprise.LTSC.2021.UnpackagedTests#metadataSet1::ChannelRequestUsingNullRemoteId",
"release_x64_Windows.10.Enterprise.LTSC.2021.UnpackagedTests#metadataSet1::ChannelRequestUsingRemoteId",
"release_x64_Windows.10.Enterprise.LTSC.2021.UnpackagedTests#metadataSet1::ChannelRequestCheckExpirationTime",
"release_x64_Windows.Server.2025.DataCenter.UnpackagedTests#metadataSet1::ChannelRequestCheckExpirationTime",
"release_x64_Windows.10.Enterprise.LTSC.2021.UnpackagedTests#metadataSet1::MultipleChannelClose",
"release_x64_Windows.10.Enterprise.LTSC.2021.UnpackagedTests#metadataSet1::VerifyRegisterAndUnregister",
"release_x64_Windows.10.Enterprise.LTSC.2021.UnpackagedTests#metadataSet1::VerifyRegisterAndUnregisterAll",
Expand Down
3 changes: 3 additions & 0 deletions test/LRPTests/APITests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ namespace Test::LRP
TEST_CLASS_PROPERTY(L"Description", L"Windows App SDK Push Notifications Long Running Process tests")
TEST_CLASS_PROPERTY(L"ThreadingModel", L"MTA")
TEST_CLASS_PROPERTY(L"RunAs", L"RestrictedUser")
// Retry on transient MSIX/COM-server install races (HRESULT 0x80073D02
// ERROR_INSTALL_RESOURCES_BUSY) seen intermittently on x86 Win10 22H2.
TEST_CLASS_PROPERTY(L"TestRetryCount", L"2")
END_TEST_CLASS()

wil::com_ptr<INotificationsLongRunningPlatform> GetNotificationPlatform()
Expand Down
34 changes: 32 additions & 2 deletions test/PushNotificationTests/BaseTestSuite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,37 @@ void BaseTestSuite::ChannelRequestUsingNullRemoteId()
}
}

// Returns true (and marks the test as Skipped) if `hr` is a known transient
// WNS production-service error that's outside the SDK's control. The test
// reaches the live WNS endpoint to allocate a channel, so service-side
// degradations would otherwise produce false-positive test failures.
static bool SkipIfWnsServiceError(HRESULT hr, PCWSTR testName)
{
// 0x8007139F == HRESULT_FROM_WIN32(ERROR_INVALID_STATE) - observed on
// multiple test images (Win10 rs5, LTSC.2021, Server.2025, Win11 24H2)
// when WNS rejects channel allocation as transiently unavailable.
if (hr == HRESULT_FROM_WIN32(0x139FL))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: why not just say ERROR_INVALID_STATE instead, like ERROR_SHARING_VIOLATION above?

{
WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped,
WEX::Common::String().Format(
L"%s: WNS returned transient service error 0x%08X; skipping (test depends on live WNS availability)",
testName, hr));
return true;
}
return false;
}

void BaseTestSuite::ChannelRequestUsingRemoteId()
{
if (PushNotificationManager::Default().IsSupported())
{
auto channelOperation{ PushNotificationManager::Default().CreateChannelAsync(c_azureRemoteId) };
VERIFY_SUCCEEDED(ChannelRequestHelper(channelOperation));
const HRESULT hr{ ChannelRequestHelper(channelOperation) };
if (FAILED(hr) && SkipIfWnsServiceError(hr, L"ChannelRequestUsingRemoteId"))
{
return;
}
VERIFY_SUCCEEDED(hr);
}
else
{
Expand All @@ -128,7 +153,12 @@ void BaseTestSuite::ChannelRequestCheckExpirationTime()
if (PushNotificationManager::Default().IsSupported())
{
auto channelOperation{ PushNotificationManager::Default().CreateChannelAsync(c_azureRemoteId) };
VERIFY_SUCCEEDED(ChannelRequestHelper(channelOperation));
const HRESULT hr{ ChannelRequestHelper(channelOperation) };
if (FAILED(hr) && SkipIfWnsServiceError(hr, L"ChannelRequestCheckExpirationTime"))
{
return;
}
VERIFY_SUCCEEDED(hr);

auto channel{ channelOperation.GetResults().Channel() };
auto expirationTime{ channel.ExpirationTime() };
Expand Down
7 changes: 6 additions & 1 deletion test/inc/WindowsAppRuntime.Test.Bootstrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,12 @@ namespace Test::Bootstrap
TP::WindowsAppRuntimeMain::c_PackageNamePrefix));
}

VERIFY_SUCCEEDED(MddBootstrapInitialize(version_MajorMinor, nullptr, minVersion));
// AddPackage now waits for FindPackageForUser to surface each registered
// package before returning, so MddBootstrapInitialize -> ResolvePackageDependency
// no longer races the OS package index. Call once and verify; any failure
// here is a real bug, not the historical 0x80270254 enumeration-lag race.
const HRESULT bootstrapHr{ MddBootstrapInitialize(version_MajorMinor, nullptr, minVersion) };
VERIFY_SUCCEEDED(bootstrapHr);
Comment thread
kythant marked this conversation as resolved.
Outdated
s_bootstrapDll = std::move(bootstrapDll);
}

Expand Down
137 changes: 136 additions & 1 deletion test/inc/WindowsAppRuntime.Test.Package.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

#include <appmodel.h>

#include <algorithm>
#include <string>
#include <vector>

#include <WindowsAppRuntime.Test.FileSystem.h>
#include <winrt/Windows.Management.Deployment.h>
#include <winrt/Windows.ApplicationModel.h>
Expand Down Expand Up @@ -312,14 +316,145 @@ inline winrt::Windows::Foundation::Uri GetAppxManifestPackageUri(PCWSTR packageF
return winrt::Windows::Foundation::Uri{ path.c_str() };
}

inline void WaitForPackageEnumerable(PCWSTR packageFullName)
{
// After AddPackageAsync's async operation completes, the OS-side
// PackageManager index can lag briefly before the just-registered
// package becomes visible to family-scoped enumeration AND its on-disk
// state reports Status.VerifyIsOK(). MddBootstrapInitialize ->
// PackageDeploymentResolver::Find resolves the DDLM via exactly that
// path (FindPackagesForUserWithPackageTypes + Status.VerifyIsOK), so a
// FindPackageForUser-by-full-name poll uses the wrong cache and returns
// too early. Mirror the resolver's enumeration here so AddPackage only
// returns once the OS will satisfy MddBootstrapInitialize.
//
// PackageFullName format: <Name>_<Version>_<Architecture>_<ResourceId>_<PublisherId>
// FamilyName format: <Name>_<PublisherId> (parts[0] + "_" + parts[4])
std::wstring fullName{ packageFullName };
std::vector<std::wstring> parts;
{
size_t start{ 0 };
for (size_t i{ 0 }; i <= fullName.size(); ++i)
{
if (i == fullName.size() || fullName[i] == L'_')
{
parts.emplace_back(fullName.substr(start, i - start));
start = i + 1;
}
}
}
if (parts.size() < 5)
{
WEX::Logging::Log::Warning(WEX::Common::String().Format(
L"WaitForPackageEnumerable('%s'): unparseable full name (parts=%zu); skipping wait",
packageFullName, parts.size()));
return;
}
const winrt::hstring familyName{ parts[0] + L"_" + parts[4] };
const winrt::hstring fullNameH{ packageFullName };

winrt::Windows::Management::Deployment::PackageManager packageManager;
const auto packageTypes{
winrt::Windows::Management::Deployment::PackageTypes::Framework |
winrt::Windows::Management::Deployment::PackageTypes::Main };

constexpr DWORD c_pollIntervalMs{ 100 };
constexpr DWORD c_timeoutMs{ 30000 };
const DWORD startTick{ GetTickCount() };
for (;;)
{
bool found{ false };
bool statusOk{ false };
try
{
auto packages{ packageManager.FindPackagesForUserWithPackageTypes(winrt::hstring{}, familyName, packageTypes) };
if (packages)
{
for (const auto& candidate : packages)
{
if (candidate.Id().FullName() == fullNameH)
{
found = true;
statusOk = candidate.Status().VerifyIsOK();
break;
}
}
}
}
catch (...)
{
// PackageManager occasionally throws transient access errors
// during the index-update window; treat as not-yet-visible.
}
if (found && statusOk)
{
return;
}
const DWORD elapsed{ GetTickCount() - startTick };
if (elapsed >= c_timeoutMs)
{
WEX::Logging::Log::Warning(WEX::Common::String().Format(
L"WaitForPackageEnumerable('%s', family='%s') timed out after %u ms (found=%d statusOk=%d); downstream bootstrap may race",
packageFullName, familyName.c_str(), elapsed, found ? 1 : 0, statusOk ? 1 : 0));
return;
}
Sleep(c_pollIntervalMs);
}
}

inline void AddPackage(PCWSTR packageDirName, PCWSTR packageFullName)
{
auto msixUri{ GetMsixPackageUri(packageDirName) };

winrt::Windows::Management::Deployment::PackageManager packageManager;
auto options{ winrt::Windows::Management::Deployment::DeploymentOptions::None };
auto deploymentResult{ packageManager.AddPackageAsync(msixUri, nullptr, options).get() };

// AddPackageAsync intermittently fails on the test agents with transient
// deployment errors (most often 0x80073D02 ERROR_INSTALL_RESOURCES_BUSY)
// when the previous test's package teardown hasn't fully released file
// handles. There's no precondition we can poll for here (the deployment
// service holds an internal lock); the documented mitigation is to back
// off and reissue. Bounded to 5 attempts so a genuine non-transient
// failure still surfaces quickly.
winrt::Windows::Management::Deployment::DeploymentResult deploymentResult{ nullptr };
constexpr int c_maxAttempts{ 5 };
DWORD backoffMs{ 1000 };
for (int attempt{ 1 }; attempt <= c_maxAttempts; ++attempt)
{
deploymentResult = packageManager.AddPackageAsync(msixUri, nullptr, options).get();
const HRESULT hr{ deploymentResult.ExtendedErrorCode() };
if (SUCCEEDED(hr))
{
if (attempt > 1)
{
WEX::Logging::Log::Comment(WEX::Common::String().Format(
L"AddPackageAsync('%s') succeeded on attempt %d", packageFullName, attempt));
}
break;
}
// ERROR_INSTALL_RESOURCES_BUSY (0x3D02) / ERROR_INSTALL_OPEN_PACKAGE_FAILED (0x3CFF)
// symbols aren't visible in this header's translation units; pass raw win32 codes
// through HRESULT_FROM_WIN32 (always-available macro) instead.
const bool isTransient{
hr == HRESULT_FROM_WIN32(0x3D02) || // ERROR_INSTALL_RESOURCES_BUSY
Copy link
Copy Markdown
Contributor

@alexlamtest alexlamtest Jun 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Why not say ERROR_INSTALL_RESOURCES_BUSY instead, like below?

hr == HRESULT_FROM_WIN32(0x3CFF) || // ERROR_INSTALL_OPEN_PACKAGE_FAILED
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: why not say ERROR_INSTALL_OPEN_PACKAGE_FAILED instead, like below?

hr == HRESULT_FROM_WIN32(ERROR_SHARING_VIOLATION) }; // 0x80070020
if (!isTransient || attempt == c_maxAttempts)
{
break;
}
WEX::Logging::Log::Comment(WEX::Common::String().Format(
L"AddPackageAsync('%s') attempt %d/%d failed with transient HRESULT 0x%08X %s; sleeping %u ms before retry",
packageFullName, attempt, c_maxAttempts, hr, deploymentResult.ErrorText().c_str(), backoffMs));
Sleep(backoffMs);
backoffMs = (std::min<DWORD>)(backoffMs * 2, 8000);
}
VERIFY_SUCCEEDED(deploymentResult.ExtendedErrorCode(), WEX::Common::String().Format(L"AddPackageAsync('%s') = 0x%0X %s", packageFullName, deploymentResult.ExtendedErrorCode(), deploymentResult.ErrorText().c_str()));

// Wait for the deployment to be visible to FindPackageForUser before
// returning so callers (notably MddBootstrapInitialize) don't race the
// OS package index.
WaitForPackageEnumerable(packageFullName);
}

inline void AddPackageDefer(PCWSTR packageDirName, PCWSTR packageFullName)
Expand Down