From 4f85095258cbc2b658b44b37bb59d7030e4c55f8 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Tue, 23 Sep 2025 13:07:12 +0000 Subject: [PATCH 01/26] Fixed init of valid_kernels to take GetAlphaBetaCase into account --- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 29 ++++++++------- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 35 ++++++++++--------- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 35 ++++++++++--------- 3 files changed, 50 insertions(+), 49 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 38be1ccb453d..5e754b854011 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -349,28 +349,28 @@ struct CKArgs std::array lPadding; std::array rPadding; }; -} // namespace template -void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::Init(const ProblemDescription& problem) +std::vector FillValidKernelsByAlphaBeta(const ProblemDescription& problem) { switch(problem.GetAlphaBetaCase()) { case BILINEAR: - valid_kernels = - FillValidKernelsIDs, CKArgs>(problem); - break; + return FillValidKernelsIDs, CKArgs>(problem); case SCALE: - valid_kernels = - FillValidKernelsIDs, CKArgs>(problem); - break; + return FillValidKernelsIDs, CKArgs>(problem); default: - valid_kernels = - FillValidKernelsIDs, CKArgs>(problem); - break; + return FillValidKernelsIDs, CKArgs>(problem); } - index = 0; - kernel_id = valid_kernels[index]; +} +} // namespace + +template +void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::Init(const ProblemDescription& problem) +{ + valid_kernels = FillValidKernelsByAlphaBeta(problem); + index = 0; + kernel_id = valid_kernels[index]; } template @@ -441,8 +441,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( using T = decltype(CKDataType); auto fill_valid_kernels = [=](const miopen::conv::ProblemDescription& problem) -> std::vector { - return miopen::solver::FillValidKernelsIDs, CKArgs>( - problem); + return FillValidKernelsByAlphaBeta(problem); }; return miopen::solver::conv::RunParameterPredictionModel(ctx, problem, diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index cac22d0258b4..dae3fed5aeae 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -345,28 +345,30 @@ struct CKArgs miopenAlphaBetaCase_t alpha_beta_case; }; -} // namespace - template -void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::Init(const ProblemDescription& problem) +std::vector FillValidKernelsByAlphaBeta(const ProblemDescription& problem) { switch(problem.GetAlphaBetaCase()) { case BILINEAR: - valid_kernels = - FillValidKernelsIDs, CKArgs>(problem); - break; + return miopen::solver::FillValidKernelsIDs, + CKArgs>(problem); case SCALE: - valid_kernels = - FillValidKernelsIDs, CKArgs>(problem); - break; + return miopen::solver::FillValidKernelsIDs, + CKArgs>(problem); default: - valid_kernels = - FillValidKernelsIDs, CKArgs>(problem); - break; + return miopen::solver::FillValidKernelsIDs, + CKArgs>(problem); } - index = 0; - kernel_id = valid_kernels[index]; +} +} // namespace + +template +void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::Init(const ProblemDescription& problem) +{ + valid_kernels = FillValidKernelsByAlphaBeta(problem); + index = 0; + kernel_id = valid_kernels[index]; } template @@ -561,10 +563,10 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( auto run_ai_heuristics = [&](auto CKDataType) { using T = decltype(CKDataType); + MIOPEN_LOG_I2("problem.GetAlphaBetaCase(): " << problem.GetAlphaBetaCase()); auto fill_valid_kernels = [=](const miopen::conv::ProblemDescription& problem) -> std::vector { - return miopen::solver::FillValidKernelsIDs, CKArgs>( - problem); + return FillValidKernelsByAlphaBeta(problem); }; return miopen::solver::conv::RunParameterPredictionModel(ctx, problem, @@ -585,7 +587,6 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( if(ai_success) { MIOPEN_LOG_I("Step 3: AI heuristics selected kernel: " << kernel_id); - return; } else { diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index c4125eea29b6..795dfe4242bd 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -305,30 +305,32 @@ struct CKArgs std::array lPadding; std::array rPadding; }; -} // namespace template -void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::Init(const ProblemDescription& problem) +std::vector FillValidKernelsByAlphaBeta(const ProblemDescription& problem) { switch(problem.GetAlphaBetaCase()) { case BILINEAR: - valid_kernels = - FillValidKernelsIDs, CKArgs>( - problem); - break; + return FillValidKernelsIDs, CKArgs>( + problem); case SCALE: - valid_kernels = - FillValidKernelsIDs, CKArgs>(problem); - break; + return FillValidKernelsIDs, CKArgs>( + problem); default: - valid_kernels = - FillValidKernelsIDs, CKArgs>(problem); - break; + return FillValidKernelsIDs, CKArgs>( + problem); } - index = 0; - split_k = 1; - kernel_id = valid_kernels[index] + "+" + std::to_string(split_k); +} +} // namespace + +template +void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::Init(const ProblemDescription& problem) +{ + valid_kernels = FillValidKernelsByAlphaBeta(problem); + index = 0; + split_k = 1; + kernel_id = valid_kernels[index] + "+" + std::to_string(split_k); } template @@ -400,8 +402,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( using T = decltype(CKDataType); auto fill_valid_kernels = [=](const miopen::conv::ProblemDescription& problem) -> std::vector { - return miopen::solver::FillValidKernelsIDs, - CKArgs>(problem); + return FillValidKernelsByAlphaBeta(problem); }; return miopen::solver::conv::RunParameterPredictionModel(ctx, problem, From fda19b77c54664c0d50b8e2af59ea5780a3010b3 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Tue, 23 Sep 2025 13:45:18 +0000 Subject: [PATCH 02/26] removed debugging log message --- .../solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index dae3fed5aeae..6f9cb6efa425 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -563,7 +563,6 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( auto run_ai_heuristics = [&](auto CKDataType) { using T = decltype(CKDataType); - MIOPEN_LOG_I2("problem.GetAlphaBetaCase(): " << problem.GetAlphaBetaCase()); auto fill_valid_kernels = [=](const miopen::conv::ProblemDescription& problem) -> std::vector { return FillValidKernelsByAlphaBeta(problem); From d822e6b364c43aa702a89ea381862be7873eac69 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Tue, 23 Sep 2025 15:16:12 +0000 Subject: [PATCH 03/26] removed tests for override functionality of conv 3d fwd solver. Env var cannot be set dynamically in tests. --- ...config_HipImplicitGemm3DGroupFwdXdlops.cpp | 161 ------------------ 1 file changed, 161 deletions(-) diff --git a/projects/miopen/test/gtest/perf_config_HipImplicitGemm3DGroupFwdXdlops.cpp b/projects/miopen/test/gtest/perf_config_HipImplicitGemm3DGroupFwdXdlops.cpp index 0684148a47e0..8dd6571996eb 100644 --- a/projects/miopen/test/gtest/perf_config_HipImplicitGemm3DGroupFwdXdlops.cpp +++ b/projects/miopen/test/gtest/perf_config_HipImplicitGemm3DGroupFwdXdlops.cpp @@ -21,30 +21,6 @@ struct PerfConfigTestCase std::string arch; }; -// helper functions to set and unset environment variables in a cross-platform way -// if required by other tests, these can be moved to a common utility file -#if defined(_WIN32) -#include -inline void set_env_var(const char* name, const char* value) -{ - SetEnvironmentVariableA(name, value); -} -inline void unset_env_var(const char* name) -{ - SetEnvironmentVariableA(name, nullptr); -} -#else -#include -inline void set_env_var(const char* name, const char* value) -{ - setenv(name, value, 1); -} -inline void unset_env_var(const char* name) -{ - unsetenv(name); -} -#endif - std::vector GetPerfConfigTestCases(miopenDataType_t data_type, std::string arch) { return {{{1, 128, 64, 32, {3, 28, 28}, {3, 3, 3}, {0, 0, 0}, {1, 1, 1}, {1, 1, 1}}, @@ -92,134 +68,6 @@ class PerfConfig_HipImplicitGemm3DGroupFwdXdlops cfg.HeuristicInit(ctx, problem); EXPECT_TRUE(cfg.index != 0) << "index is 0:" << test_case.conv; } - - void TestOverrideEnvVar() - { - auto test_case = GetParam(); - - auto&& handle = get_handle(); - miopen::ExecutionContext ctx(&handle); - if(test_case.arch != ctx.GetStream().GetDeviceName()) - GTEST_SKIP(); - - auto input_tensor_desc = - miopen::TensorDescriptor(test_case.data_type, test_case.conv.GetInput()); - - auto weights_tensor_desc = miopen::TensorDescriptor( - test_case.data_type, test_case.layout, test_case.conv.GetWeights()); - - auto conv_desc = test_case.conv.GetConv(); - - auto output_desc = conv_desc.GetForwardOutputTensor( - input_tensor_desc, weights_tensor_desc, test_case.data_type); - - auto problem = miopen::conv::ProblemDescription(input_tensor_desc, - weights_tensor_desc, - output_desc, - conv_desc, - miopen::conv::Direction::Forward); - - // Check if hardcoded heuristics conditions are met - bool will_use_hardcoded = - (test_case.data_type == miopenBFloat16 || test_case.data_type == miopenHalf) && - problem.GetInChannels() > 8 && problem.GetGroupCount() == 1 && - problem.GetAlphaBetaCase() == DEFAULT; - - // Test override with value 1 (should select index 1 if hardcoded heuristics don't trigger) - { - set_env_var("MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_IDX_OVERRIDE", "1"); - - Config cfg; - cfg.HeuristicInit(ctx, problem); - - if(will_use_hardcoded) - { - // When hardcoded heuristics trigger, they override the simple index setting - EXPECT_TRUE(cfg.index != 1) - << "Hardcoded heuristics should override simple index setting"; - EXPECT_TRUE(!cfg.kernel_id.empty()) - << "Should have selected a kernel via hardcoded heuristics"; - } - else - { - // When hardcoded heuristics don't trigger, simple index override should work - if(!cfg.valid_kernels.empty() && cfg.valid_kernels.size() > 1) - { - EXPECT_EQ(cfg.index, 1) << "Override should set index to 1"; - EXPECT_EQ(cfg.kernel_id, cfg.valid_kernels[1]) - << "kernel_id should match valid_kernels[1]"; - } - } - - unset_env_var("MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_IDX_OVERRIDE"); - } - - // Test hardcoded heuristics for BF16/FP16 with appropriate conditions - if(will_use_hardcoded) - { - set_env_var("MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_IDX_OVERRIDE", "0"); - - Config cfg; - cfg.HeuristicInit(ctx, problem); - - // Verify that hardcoded heuristics were applied - EXPECT_TRUE(!cfg.kernel_id.empty()) << "Hardcoded heuristics should select a kernel"; - - // Verify the kernel selected is reasonable (contains expected patterns) - bool has_expected_pattern = - cfg.kernel_id.find("DeviceGroupedConvFwdMultipleABD_Xdl_CShuffle_V3") != - std::string::npos && - (cfg.kernel_id.find("BlkGemmPipelineScheduler: Intrawave") != std::string::npos || - cfg.kernel_id.find("BlkGemmPipelineScheduler: Interwave") != std::string::npos); - - EXPECT_TRUE(has_expected_pattern) - << "Selected kernel should match hardcoded heuristic pattern: " << cfg.kernel_id; - - unset_env_var("MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_IDX_OVERRIDE"); - } - - // Test that override affects kernel selection by using a high index value - // that won't trigger hardcoded heuristics - { - // First get normal result (without override) - Config cfg_normal; - cfg_normal.HeuristicInit(ctx, problem); - - // Use a high index that's less likely to trigger hardcoded heuristics - // but still tests the override functionality - set_env_var("MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_IDX_OVERRIDE", "50"); - - Config cfg_override; - cfg_override.HeuristicInit(ctx, problem); - - if(will_use_hardcoded) - { - // Even with index 50, hardcoded heuristics should still trigger - // for BF16/FP16 with the right conditions - EXPECT_TRUE(!cfg_override.kernel_id.empty()) << "Should select a kernel"; - // Both normal and override will use hardcoded heuristics, so they might be the same - // The important thing is that the override path was taken - } - else - { - // For other cases, the override should work normally - if(cfg_override.valid_kernels.size() > 50) - { - EXPECT_EQ(cfg_override.index, 50) << "Override should select index 50"; - EXPECT_NE(cfg_override.kernel_id, cfg_normal.kernel_id) - << "Override should select different kernel than normal path"; - } - else - { - // If there aren't enough kernels, it should fall back to index 0 - EXPECT_EQ(cfg_override.index, 0) - << "Should fall back to index 0 when override index is out of range"; - } - } - - unset_env_var("MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_IDX_OVERRIDE"); - } - } }; using GPU_PerfConfig_HipImplicitGemm3DGroupFwdXdlops_BFP16 = @@ -230,15 +78,6 @@ using GPU_PerfConfig_HipImplicitGemm3DGroupFwdXdlops_FP16 = TEST_P(GPU_PerfConfig_HipImplicitGemm3DGroupFwdXdlops_BFP16, All) { TestConfigs(); } TEST_P(GPU_PerfConfig_HipImplicitGemm3DGroupFwdXdlops_FP16, All) { TestConfigs(); } -TEST_P(GPU_PerfConfig_HipImplicitGemm3DGroupFwdXdlops_BFP16, OverrideEnvVar) -{ - TestOverrideEnvVar(); -} -TEST_P(GPU_PerfConfig_HipImplicitGemm3DGroupFwdXdlops_FP16, OverrideEnvVar) -{ - TestOverrideEnvVar(); -} - INSTANTIATE_TEST_SUITE_P(Full, GPU_PerfConfig_HipImplicitGemm3DGroupFwdXdlops_BFP16, testing::ValuesIn(GetPerfConfigTestCases(miopenBFloat16, "gfx942"))); From 254a61c606609948526791e15e13c6213a41a746 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Wed, 24 Sep 2025 08:47:55 +0000 Subject: [PATCH 04/26] undid errors introduced by merge --- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 44 +++++++++------- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 51 ++++++++++--------- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 49 +++++++++++++----- 3 files changed, 90 insertions(+), 54 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 68c5bc40a4e2..5e754b854011 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -37,10 +37,14 @@ #include #include #include +#include +#include +#include #endif #include MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS_AI_HEUR) namespace miopen { namespace solver { @@ -180,21 +184,21 @@ struct CKArgs } filter_strides = {ProblemInterpreter::GetAdjustedConvolutionStrideD(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; + ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), + ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; filter_dilations = {ProblemInterpreter::GetAdjustedConvolutionDilationD(problem), ProblemInterpreter::GetAdjustedConvolutionDilationH(problem), ProblemInterpreter::GetAdjustedConvolutionDilationW(problem)}; lPadding = {ProblemInterpreter::GetInputLeftPadD(problem), - ProblemInterpreter::GetInputLeftPadH(problem), - ProblemInterpreter::GetInputLeftPadW(problem)}; + ProblemInterpreter::GetInputLeftPadH(problem), + ProblemInterpreter::GetInputLeftPadW(problem)}; rPadding = {ProblemInterpreter::GetAdjustedInputRightPadD(problem), - ProblemInterpreter::GetAdjustedInputRightPadH(problem), - ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; + ProblemInterpreter::GetAdjustedInputRightPadH(problem), + ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; } - CKArgs(const CKArgs&) = default; - CKArgs(CKArgs&&) = default; + CKArgs(const CKArgs&) = default; + CKArgs(CKArgs&&) = default; CKArgs& operator=(const CKArgs&) = default; template @@ -399,15 +403,10 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::CheckCKApplicability( default: return IsCKApplicable, CKArgs>(problem); } } -#endif -void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( - [[maybe_unused]] const ProblemDescription& problem) +void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( + const ProblemDescription& problem) { - index = 0; - kernel_id = ""; - -#if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetInDataType()) { case miopenHalf: Init(problem); break; @@ -502,7 +501,8 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::SetNextValue( { if(valid_kernels.empty()) { - HeuristicInit(problem); + // For generic search, we want all available kernels, not heuristic selection + InitValidKernels(problem); assert(!valid_kernels.empty()); return true; } @@ -549,10 +549,10 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::operator==( PerformanceConfigHipImplicitGemm3DGroupBwdXdlops ConvHipImplicitGemm3DGroupBwdXdlops::GetDefaultPerformanceConfig( - const ExecutionContext&, const ProblemDescription& problem) const + const ExecutionContext& ctx, const ProblemDescription& problem) const { PerformanceConfigHipImplicitGemm3DGroupBwdXdlops pp; - pp.HeuristicInit(problem); + pp.HeuristicInit(ctx, problem); return pp; } @@ -684,6 +684,14 @@ ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( #endif } +#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( + const miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} +#endif + } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index f66bf7979676..6f9cb6efa425 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -26,7 +26,7 @@ #include #include - +#include #include #include #include @@ -37,10 +37,14 @@ #include #include #include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp" +#include +#include +#include #endif #include MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS) MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_IDX_OVERRIDE); +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR) namespace miopen { namespace solver { @@ -174,21 +178,21 @@ struct CKArgs } filter_strides = {ProblemInterpreter::GetAdjustedConvolutionStrideD(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; + ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), + ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; filter_dilations = {ProblemInterpreter::GetAdjustedConvolutionDilationD(problem), ProblemInterpreter::GetAdjustedConvolutionDilationH(problem), ProblemInterpreter::GetAdjustedConvolutionDilationW(problem)}; lPadding = {ProblemInterpreter::GetInputLeftPadD(problem), - ProblemInterpreter::GetInputLeftPadH(problem), - ProblemInterpreter::GetInputLeftPadW(problem)}; + ProblemInterpreter::GetInputLeftPadH(problem), + ProblemInterpreter::GetInputLeftPadW(problem)}; rPadding = {ProblemInterpreter::GetAdjustedInputRightPadD(problem), - ProblemInterpreter::GetAdjustedInputRightPadH(problem), - ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; + ProblemInterpreter::GetAdjustedInputRightPadH(problem), + ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; } - CKArgs(const CKArgs&) = default; - CKArgs(CKArgs&&) noexcept = default; + CKArgs(const CKArgs&) = default; + CKArgs(CKArgs&&) noexcept = default; CKArgs& operator=(const CKArgs&) = default; template @@ -397,15 +401,10 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::CheckCKApplicability( default: return IsCKApplicable, CKArgs>(problem); } } -#endif -void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( - [[maybe_unused]] const ProblemDescription& problem) +void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( + const ProblemDescription& problem) { - index = 0; - kernel_id = ""; - -#if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetInDataType()) { case miopenHalf: Init(problem); break; @@ -624,13 +623,9 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::SetNextValue( { if(valid_kernels.empty()) { - HeuristicInit(problem); + // For generic search, we want all available kernels, not heuristic selection + InitValidKernels(problem); assert(!valid_kernels.empty()); - if(index != 0) - { - index = 0; - kernel_id = valid_kernels[index]; - } return true; } if((index + 1) < valid_kernels.size()) @@ -676,10 +671,10 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::operator==( PerformanceConfigHipImplicitGemm3DGroupFwdXdlops ConvHipImplicitGemm3DGroupFwdXdlops::GetDefaultPerformanceConfig( - const ExecutionContext&, const ProblemDescription& problem) const + const ExecutionContext& ctx, const ProblemDescription& problem) const { PerformanceConfigHipImplicitGemm3DGroupFwdXdlops pp; - pp.HeuristicInit(problem); + pp.HeuristicInit(ctx, problem); return pp; } @@ -839,6 +834,14 @@ ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( #endif } +#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( + const miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} +#endif + } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 4e7ed5f0e944..795dfe4242bd 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -34,10 +34,14 @@ #include #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL #include +#include +#include +#include #endif #include #include MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS_AI_HEUR) namespace miopen { namespace solver { @@ -109,20 +113,20 @@ struct CKArgs } filter_strides = {ProblemInterpreter::GetAdjustedConvolutionStrideD(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; + ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), + ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; filter_dilations = {ProblemInterpreter::GetAdjustedConvolutionDilationD(problem), ProblemInterpreter::GetAdjustedConvolutionDilationH(problem), ProblemInterpreter::GetAdjustedConvolutionDilationW(problem)}; lPadding = {ProblemInterpreter::GetInputLeftPadD(problem), - ProblemInterpreter::GetInputLeftPadH(problem), - ProblemInterpreter::GetInputLeftPadW(problem)}; + ProblemInterpreter::GetInputLeftPadH(problem), + ProblemInterpreter::GetInputLeftPadW(problem)}; rPadding = {ProblemInterpreter::GetAdjustedInputRightPadD(problem), - ProblemInterpreter::GetAdjustedInputRightPadH(problem), - ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; + ProblemInterpreter::GetAdjustedInputRightPadH(problem), + ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; } - CKArgs(const CKArgs&) = default; - CKArgs(CKArgs&&) = default; + CKArgs(const CKArgs&) = default; + CKArgs(CKArgs&&) = default; CKArgs& operator=(const CKArgs&) = default; template @@ -361,10 +365,22 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::CheckCKApplicability( return IsCKApplicable, CKArgs>(problem); } } +void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( + const ProblemDescription& problem) +{ + switch(problem.GetInDataType()) + { + case miopenHalf: Init(problem); break; + case miopenFloat: Init(problem); break; + case miopenInt8: Init(problem); break; + case miopenBFloat16: Init(problem); break; + default: break; // Unsupported data types - valid_kernels remains empty + } +} #endif void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( - [[maybe_unused]] const ProblemDescription& problem) + const miopen::ExecutionContext& ctx, const ProblemDescription& problem) { index = 0; split_k = 1; @@ -448,7 +464,8 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::SetNextValue( #if MIOPEN_USE_COMPOSABLEKERNEL if(valid_kernels.empty()) { - HeuristicInit(problem); + // For generic search, we want all available kernels, not heuristic selection + InitValidKernels(problem); if(valid_kernels.empty()) { return false; @@ -508,10 +525,10 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::operator==( PerformanceConfigHipImplicitGemm3DGroupWrwXdlops ConvHipImplicitGemm3DGroupWrwXdlops::GetDefaultPerformanceConfig( - const ExecutionContext&, const ProblemDescription& problem) const + const ExecutionContext& ctx, const ProblemDescription& problem) const { PerformanceConfigHipImplicitGemm3DGroupWrwXdlops pp; - pp.HeuristicInit(problem); + pp.HeuristicInit(ctx, problem); return pp; } @@ -646,6 +663,14 @@ ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( #endif } +#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( + const miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} +#endif + } // namespace conv } // namespace solver } // namespace miopen From 150b4de58e73fa72e5d5243f3cc5443b3f0e9049 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Thu, 25 Sep 2025 07:50:36 +0000 Subject: [PATCH 05/26] Implemented Reid's suggestions --- .../heuristics/ai_candidate_selection.cpp | 47 +++++++++++++++++-- .../ai_conv_3d_kernel_tuning_utils.cpp | 28 +++++++---- .../gtest/conv_ai_3d_kernel_tuning_utils.cpp | 6 +-- 3 files changed, 65 insertions(+), 16 deletions(-) diff --git a/projects/miopen/src/conv/heuristics/ai_candidate_selection.cpp b/projects/miopen/src/conv/heuristics/ai_candidate_selection.cpp index affc688c0fd4..54c123f8dfd1 100644 --- a/projects/miopen/src/conv/heuristics/ai_candidate_selection.cpp +++ b/projects/miopen/src/conv/heuristics/ai_candidate_selection.cpp @@ -385,11 +385,52 @@ EncodeKernelParams(const std::vector>& valid_kernel_par // Build a map from param_name to value for this candidate std::map param_value_map; + bool mapping_valid = true; for(const auto& kv : kernel_str_mapping) { - size_t idx = std::stoi(kv.first); - if(idx < candidate.size()) - param_value_map[kv.second] = candidate[idx]; + try + { + // Use std::stoull for unsigned long long, then validate range + unsigned long long ull_idx = std::stoull(kv.first); + size_t idx = static_cast(ull_idx); + + if(idx < candidate.size()) + param_value_map[kv.second] = candidate[idx]; + else + { + MIOPEN_LOG_W("Index " << idx << " out of bounds for candidate of size " + << candidate.size() << " in kernel " << kernel_name); + mapping_valid = false; + break; + } + } + catch(const std::exception& ex) + { + MIOPEN_LOG_W("Invalid index format in kernel_str_mapping: " + << kv.first << ", error: " << ex.what()); + mapping_valid = false; + break; + } + } + + if(!mapping_valid) + { + // Skip this entire candidate rather than partial processing + // also give a clear log message about the candidate being skipped + std::ostringstream candidate_str; + candidate_str << "["; + for(size_t i = 0; i < candidate.size(); ++i) + { + if(i > 0) + candidate_str << ", "; + candidate_str << "\"" << candidate[i] << "\""; + } + candidate_str << "]"; + + MIOPEN_LOG_W("Skipping candidate due to invalid kernel string mapping. " + << "Kernel: " << kernel_name << ", Candidate: " << candidate_str.str() + << ", Total mappings: " << kernel_str_mapping.size()); + continue; // Continue to the next candidate } std::vector encoded; diff --git a/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp b/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp index 2a6e7ba7d172..b586d44cb112 100644 --- a/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp +++ b/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #if MIOPEN_ENABLE_AI_KERNEL_TUNING namespace miopen { @@ -46,11 +47,11 @@ using ProblemDescription = miopen::conv::ProblemDescription; int LayoutStringToCode(const std::string& layout) { if(layout == "NCDHW") - return 0.0; + return 0; if(layout == "NDHWC") - return 1.0; + return 1; // Add more as needed - return -1.0; // Unknown + return -1; // Unknown } // Helper: Extract 3D convolution features @@ -137,9 +138,12 @@ std::vector GetKernelAsTokens(const std::string& kernel) auto lt_pos = kernel.find('<'); if(lt_pos != std::string::npos) { - // Add the entire prefix (before '<') as a single token - std::string prefix = kernel.substr(0, lt_pos); - prefix.erase(remove_if(prefix.begin(), prefix.end(), isspace), prefix.end()); + // Add the entire prefix (before '<') as a single token, removing whitespace + std::string prefix; + std::remove_copy_if(kernel.begin(), + kernel.begin() + lt_pos, + std::back_inserter(prefix), + [](char c) { return std::isspace(c); }); if(!prefix.empty()) tokens.push_back(prefix); @@ -152,9 +156,13 @@ std::vector GetKernelAsTokens(const std::string& kernel) std::string token; while(std::getline(ps, token, ',')) { - token.erase(remove_if(token.begin(), token.end(), isspace), token.end()); - if(!token.empty()) - tokens.push_back(token); + std::string clean_token; + std::remove_copy_if(token.begin(), + token.end(), + std::back_inserter(clean_token), + [](char c) { return std::isspace(c); }); + if(!clean_token.empty()) + tokens.push_back(clean_token); } } } @@ -227,7 +235,7 @@ bool RunParameterPredictionModel( std::map features = GetFeatures3D(problem, ctx.GetStream().GetMaxComputeUnits(), arch); - bool use_split_k = split_k == 1; + bool use_split_k = split_k != 0; if(split_k > 1) { MIOPEN_THROW("Invalid initial split_k value for performing AI Heuristics: " + diff --git a/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp b/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp index a93ec63f5747..c36b11f9adaf 100644 --- a/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp +++ b/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp @@ -47,10 +47,10 @@ namespace { int LayoutStringToCode(const std::string& layout) { if(layout == "NCDHW") - return 0.0; + return 0; if(layout == "NDHWC") - return 1.0; - return -1.0; // Unknown + return 1; + return -1; // Unknown } // Dummy kernels for testing From b726e64a8017ab3c334783a9948402713c9ec5ab Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Thu, 25 Sep 2025 09:43:35 +0000 Subject: [PATCH 06/26] fixed test function to follow more robust handling of erroneous kernel parameters --- .../test/gtest/conv_ai_candidate_selection_model.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/projects/miopen/test/gtest/conv_ai_candidate_selection_model.cpp b/projects/miopen/test/gtest/conv_ai_candidate_selection_model.cpp index bbbb5ffb467c..c38195f55e2e 100644 --- a/projects/miopen/test/gtest/conv_ai_candidate_selection_model.cpp +++ b/projects/miopen/test/gtest/conv_ai_candidate_selection_model.cpp @@ -224,7 +224,14 @@ TEST_P(CPU_CandidateSelection_NONE, EncodeKernelParamsBadValueThrows_Test) CandidateSelectionMetadata meta(params.arch, params.solver); std::vector> bad_params = { {params.kernel_name, "nonexistent_value", "nan"}}; - EXPECT_THROW(EncodeKernelParams(bad_params, meta), std::exception); + + // The function should not throw, but should return empty result due to invalid mapping + std::vector> result; + EXPECT_NO_THROW(result = EncodeKernelParams(bad_params, meta)); + + // Verify that the invalid candidate was skipped (empty result) + EXPECT_TRUE(result.empty()) + << "Expected empty result when all candidates have invalid mappings"; } TEST_P(CPU_CandidateSelection_NONE, SelectBestCandidateValid_Test) From 29fea9341732a6d0236a0c09b2a5bc12426b5c4d Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Wed, 1 Oct 2025 12:13:50 +0000 Subject: [PATCH 07/26] added missing return statement in fwd solver --- .../solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index 6f9cb6efa425..813412e8738d 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -586,6 +586,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( if(ai_success) { MIOPEN_LOG_I("Step 3: AI heuristics selected kernel: " << kernel_id); + return; } else { From 1e43e2a02ce8d5f3379657e7842bab6af63dc46c Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Wed, 1 Oct 2025 13:55:07 +0000 Subject: [PATCH 08/26] new way of handling CandidateSelectionResult --- .../heuristics/ai_candidate_selection.cpp | 55 +++++++++++-------- .../ai_conv_3d_kernel_tuning_utils.cpp | 28 ++++++---- .../heuristics/ai_candidate_selection.hpp | 27 +++++++-- .../conv_ai_candidate_selection_model.cpp | 26 +++++++-- 4 files changed, 93 insertions(+), 43 deletions(-) diff --git a/projects/miopen/src/conv/heuristics/ai_candidate_selection.cpp b/projects/miopen/src/conv/heuristics/ai_candidate_selection.cpp index 54c123f8dfd1..14293c7fad39 100644 --- a/projects/miopen/src/conv/heuristics/ai_candidate_selection.cpp +++ b/projects/miopen/src/conv/heuristics/ai_candidate_selection.cpp @@ -285,34 +285,40 @@ std::vector> CandidateSelectionModel::EncodeKernelConfigs( } MIOPEN_INTERNALS_EXPORT -int CandidateSelectionModel::SelectBestCandidateIdx( +std::vector> CandidateSelectionModel::SelectBestCandidateIndices( const std::vector& encoded_features, const std::vector>& encoded_configs) const { if(encoded_configs.empty() || encoded_features.empty()) { MIOPEN_THROW(miopenStatusInternalError, - "Empty features or configs in SelectBestCandidateIdx"); + "Empty features or configs in SelectBestCandidateIndices"); } size_t feature_dim = encoded_features.size(); size_t num_candidates = encoded_configs.size(); - std::vector selection_scores(num_candidates, 0.0f); + std::vector> scored_candidates; + scored_candidates.reserve(num_candidates); for(size_t i = 0; i < num_candidates; ++i) { if(encoded_configs[i].size() != feature_dim) MIOPEN_THROW(miopenStatusInternalError, - "Config dimension mismatch in SelectBestCandidateIdx"); - selection_scores[i] = std::inner_product( + "Config dimension mismatch in SelectBestCandidateIndices"); + + float score = std::inner_product( encoded_configs[i].begin(), encoded_configs[i].end(), encoded_features.begin(), 0.0f); + scored_candidates.emplace_back(static_cast(i), score); } - return static_cast(std::max_element(selection_scores.begin(), selection_scores.end()) - - selection_scores.begin()); -} + // Sort by score in descending order (best to worst) + std::sort(scored_candidates.begin(), scored_candidates.end(), [](const auto& a, const auto& b) { + return a.second > b.second; + }); + return scored_candidates; +} // --- Factory and Helper Functions ------------------------------------------- // Helper: Expand kernel params with split_k and keep mapping @@ -552,7 +558,6 @@ ModelSelectBestCandidate(const std::string& arch, if(use_split_k) { - // std::vector split_ks = GenerateSplitK(128); // TODO: make configurable // get split_k values from metadata const auto& split_ks = model.metadata().GetSplitKValues(); @@ -574,36 +579,42 @@ ModelSelectBestCandidate(const std::string& arch, if(encoded_candidates.empty()) { MIOPEN_LOG_W("No valid encoded candidates available"); - return CandidateSelectionResult{-1, 0}; + return CandidateSelectionResult{{}, {}}; } const auto& encoded_features = model.EncodeInputFeatures(features); const auto& encoded_configs = model.EncodeKernelConfigs(encoded_candidates); - const int best_idx = model.SelectBestCandidateIdx(encoded_features, encoded_configs); + // Get all candidates sorted by score (best to worst) + auto scored_candidates = + model.SelectBestCandidateIndices(encoded_features, encoded_configs); + ; - if(best_idx >= 0) - { - int original_index = mapping_pairs[best_idx].first; - int split_k_value = mapping_pairs[best_idx].second; - return CandidateSelectionResult{original_index, split_k_value}; - } - else + CandidateSelectionResult result; + result.kernel_indices.reserve(scored_candidates.size()); + result.split_k_values.reserve(scored_candidates.size()); + + for(const auto& [candidate_idx, score] : scored_candidates) { - MIOPEN_LOG_W("Invalid candidate index returned: " << best_idx); - return CandidateSelectionResult{-1, 0}; + if(candidate_idx >= 0 && candidate_idx < static_cast(mapping_pairs.size())) + { + result.kernel_indices.push_back(mapping_pairs[candidate_idx].first); + result.split_k_values.push_back(mapping_pairs[candidate_idx].second); + } } + + return result; } catch(const miopen::Exception& ex) { MIOPEN_LOG_I2("[Warning] Candidate selection model failed: " << ex.what()); - return CandidateSelectionResult{-1, 0}; + return CandidateSelectionResult{{}, {}}; } catch(const std::exception& ex) { MIOPEN_LOG_I2( "[Warning] Candidate selection model failed with std exception: " << ex.what()); - return CandidateSelectionResult{-1, 0}; + return CandidateSelectionResult{{}, {}}; } } diff --git a/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp b/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp index b586d44cb112..cb2d473bcd0f 100644 --- a/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp +++ b/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp @@ -245,19 +245,27 @@ bool RunParameterPredictionModel( auto result = ai::tuning::candidate_selection::ModelSelectBestCandidate( arch, solver_name, features, heuristic_kernels, use_split_k); - if(result.kernel_index >= 0 && result.kernel_index < static_cast(valid_kernels.size())) + // Check if we have any candidates + if(!result.IsEmpty()) { - index = result.kernel_index; - split_k = result.split_k; - if(use_split_k) - { - kernel_id = valid_kernels[index] + "+" + std::to_string(split_k); - } - else + // Get the best candidate (first in the sorted list) + int best_index = result.GetBestKernelIndex(); + int best_split_k = result.GetBestSplitK(); + + if(best_index >= 0 && best_index < static_cast(valid_kernels.size())) { - kernel_id = valid_kernels[index]; + index = best_index; + split_k = best_split_k; + if(use_split_k) + { + kernel_id = valid_kernels[index] + "+" + std::to_string(split_k); + } + else + { + kernel_id = valid_kernels[index]; + } + return true; } - return true; } MIOPEN_LOG_I("AI prediction returned invalid kernel index, falling back"); return false; diff --git a/projects/miopen/src/include/miopen/conv/heuristics/ai_candidate_selection.hpp b/projects/miopen/src/include/miopen/conv/heuristics/ai_candidate_selection.hpp index 6b8757abd869..3cf87b1e425b 100644 --- a/projects/miopen/src/include/miopen/conv/heuristics/ai_candidate_selection.hpp +++ b/projects/miopen/src/include/miopen/conv/heuristics/ai_candidate_selection.hpp @@ -103,9 +103,9 @@ class CandidateSelectionModel EncodeInputFeatures(const std::map& features) const; MIOPEN_INTERNALS_EXPORT std::vector> EncodeKernelConfigs(const std::vector>& encoded_candidates) const; - MIOPEN_INTERNALS_EXPORT int - SelectBestCandidateIdx(const std::vector& encoded_features, - const std::vector>& encoded_configs) const; + MIOPEN_INTERNALS_EXPORT std::vector> + SelectBestCandidateIndices(const std::vector& encoded_features, + const std::vector>& encoded_configs) const; const CandidateSelectionMetadata& metadata() const { return metadata_; } private: @@ -123,8 +123,25 @@ EncodeKernelParams(const std::vector>& valid_kernel_par struct CandidateSelectionResult { - int kernel_index; // Index of the original kernel in the input list - int split_k; // The selected split_k value + std::vector kernel_indices; // Sorted list of kernel indices (best to worst) + std::vector split_k_values; // Corresponding split_k values + + // Helper methods for backward compatibility and convenience + int GetBestKernelIndex() const { return kernel_indices.empty() ? -1 : kernel_indices[0]; } + int GetBestSplitK() const { return split_k_values.empty() ? 1 : split_k_values[0]; } + + int GetFallbackKernelIndex(size_t fallback_level = 1) const + { + return (fallback_level < kernel_indices.size()) ? kernel_indices[fallback_level] : -1; + } + + int GetFallbackSplitK(size_t fallback_level = 1) const + { + return (fallback_level < split_k_values.size()) ? split_k_values[fallback_level] : 1; + } + + size_t GetNumCandidates() const { return kernel_indices.size(); } + bool IsEmpty() const { return kernel_indices.empty(); } }; MIOPEN_INTERNALS_EXPORT diff --git a/projects/miopen/test/gtest/conv_ai_candidate_selection_model.cpp b/projects/miopen/test/gtest/conv_ai_candidate_selection_model.cpp index c38195f55e2e..33de83ff37fe 100644 --- a/projects/miopen/test/gtest/conv_ai_candidate_selection_model.cpp +++ b/projects/miopen/test/gtest/conv_ai_candidate_selection_model.cpp @@ -246,9 +246,17 @@ TEST_P(CPU_CandidateSelection_NONE, SelectBestCandidateValid_Test) auto valid_kernel_params = GenerateValidKernelParams(meta, params.kernel_name, 3); auto encoded_candidates = EncodeKernelParams(valid_kernel_params, meta); auto encoded_configs = model.EncodeKernelConfigs(encoded_candidates); - int idx = model.SelectBestCandidateIdx(encoded_features, encoded_configs); - ASSERT_GE(idx, 0); - ASSERT_LT(idx, static_cast(valid_kernel_params.size())); + std::vector> ids = + model.SelectBestCandidateIndices(encoded_features, encoded_configs); + ASSERT_FALSE(ids.empty()) << "No candidates were selected!"; + for(const auto& candidate : ids) + { + const int idx = candidate.first; + ASSERT_GE(idx, 0) << "Candidate index is negative!"; + ASSERT_LT(idx, static_cast(valid_kernel_params.size())) + << "Candidate index " << idx << " out of range [0, " << valid_kernel_params.size() - 1 + << "]"; + } } TEST_P(CPU_CandidateSelection_NONE, SelectBestCandidateEmptyInput_Test) @@ -257,7 +265,8 @@ TEST_P(CPU_CandidateSelection_NONE, SelectBestCandidateEmptyInput_Test) CandidateSelectionModel model(params.arch, params.solver); std::vector encoded_features; std::vector> encoded_configs; - EXPECT_THROW(model.SelectBestCandidateIdx(encoded_features, encoded_configs), std::exception); + EXPECT_THROW(model.SelectBestCandidateIndices(encoded_features, encoded_configs), + std::exception); } TEST_P(CPU_CandidateSelection_NONE, ModelSelectBestCandidate_Test) @@ -270,8 +279,13 @@ TEST_P(CPU_CandidateSelection_NONE, ModelSelectBestCandidate_Test) auto valid_kernel_params = GenerateValidKernelParams(meta, params.kernel_name, 3); auto result = ModelSelectBestCandidate( params.arch, params.solver, features, valid_kernel_params, /*use_split_k=*/false); - ASSERT_GE(result.kernel_index, 0); - ASSERT_LT(result.kernel_index, static_cast(valid_kernel_params.size())); + for(const auto& idx : result.kernel_indices) + { + ASSERT_GE(idx, 0) << "Candidate index is negative!"; + ASSERT_LT(idx, static_cast(valid_kernel_params.size())) + << "Candidate index " << idx << " out of range [0, " << valid_kernel_params.size() - 1 + << "]"; + } } TEST_P(CPU_CandidateSelection_NONE, ExpandKernelParamsWithSplitK_Test) From e3e09778ee0aa895b38edcff115ea729eb76240e Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Wed, 1 Oct 2025 13:56:50 +0000 Subject: [PATCH 09/26] changed default value for kernel_id in HeuristicInit --- .../conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 2 +- .../conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 5e754b854011..3391648e6bf3 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -422,7 +422,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ProblemDescription& problem) { index = 0; - kernel_id = ""; + kernel_id = valid_kernels[index]; split_k = 0; // split_k is not used in this solver, but it is required by the interface #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index 813412e8738d..64bf298aec98 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -420,7 +420,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ProblemDescription& problem) { index = 0; - kernel_id = ""; + kernel_id = valid_kernels[index]; split_k = 0; // split_k is not used in this solver, but it is required by the AI heuristics interface From 4c17977e91aee6d71194096a0355c96e90b70714 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Wed, 1 Oct 2025 14:45:07 +0000 Subject: [PATCH 10/26] Added blacklist for wrw solver to avoid problematic kernels --- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 71 +++++++++++++++++-- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 795dfe4242bd..e7930894d28b 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -397,7 +397,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( bool ai_success = false; std::string solver_name = "ConvHipImplicitGemm3DGroupWrwXdlops"; - + miopen::ai::tuning::candidate_selection::CandidateSelectionResult result; auto run_ai_heuristics = [&](auto CKDataType) { using T = decltype(CKDataType); auto fill_valid_kernels = @@ -421,15 +421,76 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( default: break; } - if(ai_success) + if(ai_success && !result.IsEmpty()) { - MIOPEN_LOG_I("Step 1: AI heuristics selected kernel: " << kernel_id); - return; + // Helper function to check if kernel is blacklisted + auto IsBlacklistedKernel = [](const std::string& kernel_id) -> bool { + return kernel_id.find( + "DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle<256, 64, 64, 64, " + "Default, 8, 1, 1, 2, 8, 2, 8, 1, 1,") != std::string::npos && + kernel_id.find("BlkGemmPipelineScheduler: Intrawave") != std::string::npos; + }; + + // Try candidates in order until we find a non-blacklisted one + bool found_valid_candidate = false; + size_t fallback_level = 0; + + while(fallback_level < result.GetNumCandidates()) + { + int candidate_index = (fallback_level == 0) + ? result.GetBestKernelIndex() + : result.GetFallbackKernelIndex(fallback_level); + int candidate_split_k = (fallback_level == 0) + ? result.GetBestSplitK() + : result.GetFallbackSplitK(fallback_level); + + if(candidate_index < 0 || candidate_index >= static_cast(valid_kernels.size())) + { + fallback_level++; + continue; + } + + std::string candidate_kernel_id = + valid_kernels[candidate_index] + "+" + std::to_string(candidate_split_k); + + if(!IsBlacklistedKernel(candidate_kernel_id)) + { + // Found a valid candidate + index = candidate_index; + split_k = candidate_split_k; + kernel_id = candidate_kernel_id; + found_valid_candidate = true; + + if(fallback_level == 0) + { + MIOPEN_LOG_I("Step 1: AI heuristics selected kernel: " << kernel_id); + } + else + { + MIOPEN_LOG_I("Step 1: AI heuristics selected fallback kernel (level " + << fallback_level << "): " << kernel_id); + } + return; + } + else + { + MIOPEN_LOG_I2("Step 1: AI candidate " + << fallback_level << " is blacklisted: " << candidate_kernel_id + << " - trying next candidate"); + fallback_level++; + } + } + + if(!found_valid_candidate) + { + MIOPEN_LOG_I2("Step 1: All AI candidates are blacklisted, falling back to default"); + ai_success = false; // Fall back to default initialization + } } else { MIOPEN_LOG_I2("Step 1: AI heuristics failed, proceeding to default initialization"); - // Continue to default initialization + ai_success = false; } } else From deaf960e4348e72803c2be593bee80a34f12b11b Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Thu, 2 Oct 2025 08:30:21 +0000 Subject: [PATCH 11/26] updated RunParameterPredictionModel to also return the model results. Removed blacklisting --- .../ai_conv_3d_kernel_tuning_utils.cpp | 21 +++-- .../ai_conv_3d_kernel_tuning_utils.hpp | 12 ++- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 12 +-- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 12 +-- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 83 ++++--------------- 5 files changed, 50 insertions(+), 90 deletions(-) diff --git a/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp b/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp index cb2d473bcd0f..a962044865ce 100644 --- a/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp +++ b/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp @@ -209,7 +209,8 @@ std::vector GenerateSplitK(int max_split_k) // Main template implementation template -bool RunParameterPredictionModel( +std::pair +RunParameterPredictionModel( const miopen::ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem, std::vector& valid_kernels, @@ -264,21 +265,22 @@ bool RunParameterPredictionModel( { kernel_id = valid_kernels[index]; } - return true; + return {true, result}; } } MIOPEN_LOG_I("AI prediction returned invalid kernel index, falling back"); - return false; + return {false, result}; } catch(const miopen::Exception& ex) { MIOPEN_LOG_I2("[Warning] AI model failed: " << ex.what()); - return false; + return {false, ai::tuning::candidate_selection::CandidateSelectionResult{}}; } } // Explicit template instantiations for common types -template bool RunParameterPredictionModel( +template std::pair +RunParameterPredictionModel( const ExecutionContext&, const ProblemDescription&, std::vector&, @@ -288,7 +290,8 @@ template bool RunParameterPredictionModel( std::function(const ProblemDescription&)>, std::string); -template bool RunParameterPredictionModel( +template std::pair +RunParameterPredictionModel( const ExecutionContext&, const ProblemDescription&, std::vector&, @@ -299,7 +302,8 @@ template bool RunParameterPredictionModel( std::string); #if MIOPEN_USE_COMPOSABLEKERNEL -template bool RunParameterPredictionModel( +template std::pair +RunParameterPredictionModel( const ExecutionContext&, const ProblemDescription&, std::vector&, @@ -309,7 +313,8 @@ template bool RunParameterPredictionModel( std::function(const ProblemDescription&)>, std::string); -template bool RunParameterPredictionModel( +template std::pair +RunParameterPredictionModel( const ExecutionContext&, const ProblemDescription&, std::vector&, diff --git a/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp b/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp index d1d878b038a7..40dae0a79f77 100644 --- a/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp +++ b/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp @@ -52,7 +52,8 @@ MIOPEN_INTERNALS_EXPORT void FillHeuristicKernels(const std::vector MIOPEN_INTERNALS_EXPORT std::vector GenerateSplitK(int max_split_k); template -MIOPEN_INTERNALS_EXPORT bool RunParameterPredictionModel( +MIOPEN_INTERNALS_EXPORT std::pair +RunParameterPredictionModel( const miopen::ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem, std::vector& valid_kernels, @@ -63,7 +64,8 @@ MIOPEN_INTERNALS_EXPORT bool RunParameterPredictionModel( fill_valid_kernels, std::string solver_name); -extern template bool RunParameterPredictionModel( +extern template std::pair +RunParameterPredictionModel( const miopen::ExecutionContext&, const miopen::conv::ProblemDescription&, std::vector&, @@ -73,7 +75,8 @@ extern template bool RunParameterPredictionModel( std::function(const miopen::conv::ProblemDescription&)>, std::string); #if MIOPEN_USE_COMPOSABLEKERNEL -extern template bool RunParameterPredictionModel( +extern template std::pair +RunParameterPredictionModel( const miopen::ExecutionContext&, const miopen::conv::ProblemDescription&, std::vector&, @@ -83,7 +86,8 @@ extern template bool RunParameterPredictionModel( std::function(const miopen::conv::ProblemDescription&)>, std::string); -extern template bool RunParameterPredictionModel( +extern template std::pair +RunParameterPredictionModel( const miopen::ExecutionContext&, const miopen::conv::ProblemDescription&, std::vector&, diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 3391648e6bf3..60887b19ba6d 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -434,9 +434,11 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( MIOPEN_LOG_I2( "Step 1: Attempting AI heuristics for data type: " << problem.GetInDataType()); - bool ai_success = false; std::string solver_name = "ConvHipImplicitGemm3DGroupBwdXdlops"; + bool ai_success = false; + miopen::ai::tuning::candidate_selection::CandidateSelectionResult result; + auto run_ai_heuristics = [&](auto CKDataType) { using T = decltype(CKDataType); auto fill_valid_kernels = @@ -454,13 +456,13 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( }; switch(problem.GetInDataType()) { - case miopenHalf: ai_success = run_ai_heuristics(ck::half_t{}); break; - case miopenFloat: ai_success = run_ai_heuristics(float{}); break; - case miopenBFloat16: ai_success = run_ai_heuristics(ck::bhalf_t{}); break; + case miopenHalf: std::tie(ai_success, result) = run_ai_heuristics(ck::half_t{}); break; + case miopenFloat: std::tie(ai_success, result) = run_ai_heuristics(float{}); break; + case miopenBFloat16: std::tie(ai_success, result) = run_ai_heuristics(ck::bhalf_t{}); break; default: break; } - if(ai_success) + if(ai_success && !result.IsEmpty()) { MIOPEN_LOG_I("Step 1: AI heuristics selected kernel: " << kernel_id); return; diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index 64bf298aec98..a2108c02670d 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -558,9 +558,11 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( MIOPEN_LOG_I2( "Step 3: Attempting AI heuristics for data type: " << problem.GetInDataType()); - bool ai_success = false; std::string solver_name = "ConvHipImplicitGemm3DGroupFwdXdlops"; + bool ai_success = false; + miopen::ai::tuning::candidate_selection::CandidateSelectionResult result; + auto run_ai_heuristics = [&](auto CKDataType) { using T = decltype(CKDataType); auto fill_valid_kernels = @@ -578,12 +580,12 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( }; switch(problem.GetInDataType()) { - case miopenHalf: ai_success = run_ai_heuristics(ck::half_t{}); break; - case miopenFloat: ai_success = run_ai_heuristics(float{}); break; - case miopenBFloat16: ai_success = run_ai_heuristics(ck::bhalf_t{}); break; + case miopenHalf: std::tie(ai_success, result) = run_ai_heuristics(ck::half_t{}); break; + case miopenFloat: std::tie(ai_success, result) = run_ai_heuristics(float{}); break; + case miopenBFloat16: std::tie(ai_success, result) = run_ai_heuristics(ck::bhalf_t{}); break; default: break; } - if(ai_success) + if(ai_success && !result.IsEmpty()) { MIOPEN_LOG_I("Step 3: AI heuristics selected kernel: " << kernel_id); return; diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 77860896e1ea..8b38afe368b9 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -398,9 +398,11 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( MIOPEN_LOG_I2( "Step 1: Attempting AI heuristics for data type: " << problem.GetInDataType()); - bool ai_success = false; std::string solver_name = "ConvHipImplicitGemm3DGroupWrwXdlops"; + + bool ai_success = false; miopen::ai::tuning::candidate_selection::CandidateSelectionResult result; + auto run_ai_heuristics = [&](auto CKDataType) { using T = decltype(CKDataType); auto fill_valid_kernels = @@ -418,82 +420,27 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( }; switch(problem.GetInDataType()) { - case miopenHalf: ai_success = run_ai_heuristics(ck::half_t{}); break; - case miopenFloat: ai_success = run_ai_heuristics(float{}); break; - case miopenBFloat16: ai_success = run_ai_heuristics(ck::bhalf_t{}); break; + case miopenHalf: std::tie(ai_success, result) = run_ai_heuristics(ck::half_t{}); break; + case miopenFloat: std::tie(ai_success, result) = run_ai_heuristics(float{}); break; + case miopenBFloat16: std::tie(ai_success, result) = run_ai_heuristics(ck::bhalf_t{}); break; default: break; } if(ai_success && !result.IsEmpty()) { - // Helper function to check if kernel is blacklisted - auto IsBlacklistedKernel = [](const std::string& kernel_id) -> bool { - return kernel_id.find( - "DeviceGroupedConvBwdWeightTwoStage_Xdl_CShuffle<256, 64, 64, 64, " - "Default, 8, 1, 1, 2, 8, 2, 8, 1, 1,") != std::string::npos && - kernel_id.find("BlkGemmPipelineScheduler: Intrawave") != std::string::npos; - }; - - // Try candidates in order until we find a non-blacklisted one - bool found_valid_candidate = false; - size_t fallback_level = 0; - - while(fallback_level < result.GetNumCandidates()) - { - int candidate_index = (fallback_level == 0) - ? result.GetBestKernelIndex() - : result.GetFallbackKernelIndex(fallback_level); - int candidate_split_k = (fallback_level == 0) - ? result.GetBestSplitK() - : result.GetFallbackSplitK(fallback_level); - - if(candidate_index < 0 || candidate_index >= static_cast(valid_kernels.size())) - { - fallback_level++; - continue; - } - - std::string candidate_kernel_id = - valid_kernels[candidate_index] + "+" + std::to_string(candidate_split_k); - - if(!IsBlacklistedKernel(candidate_kernel_id)) - { - // Found a valid candidate - index = candidate_index; - split_k = candidate_split_k; - kernel_id = candidate_kernel_id; - found_valid_candidate = true; - - if(fallback_level == 0) - { - MIOPEN_LOG_I("Step 1: AI heuristics selected kernel: " << kernel_id); - } - else - { - MIOPEN_LOG_I("Step 1: AI heuristics selected fallback kernel (level " - << fallback_level << "): " << kernel_id); - } - return; - } - else - { - MIOPEN_LOG_I2("Step 1: AI candidate " - << fallback_level << " is blacklisted: " << candidate_kernel_id - << " - trying next candidate"); - fallback_level++; - } - } - - if(!found_valid_candidate) - { - MIOPEN_LOG_I2("Step 1: All AI candidates are blacklisted, falling back to default"); - ai_success = false; // Fall back to default initialization - } + MIOPEN_LOG_I("Step 1: AI heuristics selected kernel: " << kernel_id); + return; } else { MIOPEN_LOG_I2("Step 1: AI heuristics failed, proceeding to default initialization"); - ai_success = false; + // print results to log to help debugging + if(!ai_success) + MIOPEN_LOG_I2("Step 1: AI heuristics internal failure"); + else if(result.IsEmpty()) + MIOPEN_LOG_I2("Step 1: AI heuristics returned empty result"); + else if(valid_kernels.empty()) + ai_success = false; } } else From a8f5abc5b34ce3fc1daec0c924bd0fdafed46dad Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Thu, 2 Oct 2025 08:49:21 +0000 Subject: [PATCH 12/26] fixed test function to handle new RunParameterPredictionModel output --- .../test/gtest/conv_ai_3d_kernel_tuning_utils.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp b/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp index c36b11f9adaf..212ae007e386 100644 --- a/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp +++ b/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp @@ -322,10 +322,13 @@ TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Test) std::string kernel_id; std::vector valid_kernels; - bool result = miopen::solver::conv::RunParameterPredictionModel( + bool ai_success = false; + miopen::ai::tuning::candidate_selection::CandidateSelectionResult result; + + std::tie(ai_success, result) = miopen::solver::conv::RunParameterPredictionModel( ctx, problem, valid_kernels, index, split_k, kernel_id, fill_valid_kernels, solver_name); - ASSERT_TRUE(result); + ASSERT_TRUE(ai_success); ASSERT_FALSE(kernel_id.empty()); } @@ -340,10 +343,13 @@ TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Fallback_Test) std::string kernel_id; std::vector valid_kernels; - bool result = miopen::solver::conv::RunParameterPredictionModel( + bool ai_success = false; + miopen::ai::tuning::candidate_selection::CandidateSelectionResult result; + + std::tie(ai_success, result) = miopen::solver::conv::RunParameterPredictionModel( ctx, problem, valid_kernels, index, split_k, kernel_id, empty_kernels, solver_name); - ASSERT_FALSE(result); + ASSERT_FALSE(ai_success); ASSERT_TRUE(kernel_id.empty()); } From 71070a76ba4e60aa67e130b123e9dfdee2bb9608 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Thu, 2 Oct 2025 11:10:31 +0000 Subject: [PATCH 13/26] some cleanup in the hunt for linker errors --- .../ai_conv_3d_kernel_tuning_utils.cpp | 25 +++++------ .../ai_conv_3d_kernel_tuning_utils.hpp | 44 ++++++++++++------- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 2 - .../gtest/conv_ai_3d_kernel_tuning_utils.cpp | 12 ++--- 4 files changed, 44 insertions(+), 39 deletions(-) diff --git a/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp b/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp index a962044865ce..5ea4a93ff0a6 100644 --- a/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp +++ b/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp @@ -209,16 +209,15 @@ std::vector GenerateSplitK(int max_split_k) // Main template implementation template -std::pair +std::pair RunParameterPredictionModel( const miopen::ExecutionContext& ctx, - const miopen::conv::ProblemDescription& problem, + const ProblemDescription& problem, std::vector& valid_kernels, int& index, int& split_k, std::string& kernel_id, - std::function(const miopen::conv::ProblemDescription&)> - fill_valid_kernels, + std::function(const ProblemDescription&)> fill_valid_kernels, std::string solver_name) { valid_kernels = fill_valid_kernels(problem); @@ -274,14 +273,14 @@ RunParameterPredictionModel( catch(const miopen::Exception& ex) { MIOPEN_LOG_I2("[Warning] AI model failed: " << ex.what()); - return {false, ai::tuning::candidate_selection::CandidateSelectionResult{}}; + return {false, miopen::ai::tuning::candidate_selection::CandidateSelectionResult{}}; } } // Explicit template instantiations for common types -template std::pair +template std::pair RunParameterPredictionModel( - const ExecutionContext&, + const miopen::ExecutionContext&, const ProblemDescription&, std::vector&, int&, @@ -290,9 +289,9 @@ RunParameterPredictionModel( std::function(const ProblemDescription&)>, std::string); -template std::pair +template std::pair RunParameterPredictionModel( - const ExecutionContext&, + const miopen::ExecutionContext&, const ProblemDescription&, std::vector&, int&, @@ -302,9 +301,9 @@ RunParameterPredictionModel( std::string); #if MIOPEN_USE_COMPOSABLEKERNEL -template std::pair +template std::pair RunParameterPredictionModel( - const ExecutionContext&, + const miopen::ExecutionContext&, const ProblemDescription&, std::vector&, int&, @@ -313,9 +312,9 @@ RunParameterPredictionModel( std::function(const ProblemDescription&)>, std::string); -template std::pair +template std::pair RunParameterPredictionModel( - const ExecutionContext&, + const miopen::ExecutionContext&, const ProblemDescription&, std::vector&, int&, diff --git a/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp b/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp index 40dae0a79f77..56543f67653f 100644 --- a/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp +++ b/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp @@ -39,7 +39,7 @@ namespace miopen { namespace solver { namespace conv { - +#if MIOPEN_ENABLE_AI_KERNEL_TUNING const miopen::ExecutionContext& GetDummyCtx(); MIOPEN_INTERNALS_EXPORT std::map @@ -52,19 +52,20 @@ MIOPEN_INTERNALS_EXPORT void FillHeuristicKernels(const std::vector MIOPEN_INTERNALS_EXPORT std::vector GenerateSplitK(int max_split_k); template -MIOPEN_INTERNALS_EXPORT std::pair -RunParameterPredictionModel( - const miopen::ExecutionContext& ctx, - const miopen::conv::ProblemDescription& problem, - std::vector& valid_kernels, - int& index, - int& split_k, - std::string& kernel_id, - std::function(const miopen::conv::ProblemDescription&)> - fill_valid_kernels, - std::string solver_name); +MIOPEN_INTERNALS_EXPORT + std::pair + RunParameterPredictionModel( + const miopen::ExecutionContext& ctx, + const miopen::conv::ProblemDescription& problem, + std::vector& valid_kernels, + int& index, + int& split_k, + std::string& kernel_id, + std::function(const miopen::conv::ProblemDescription&)> + fill_valid_kernels, + std::string solver_name); -extern template std::pair +extern template std::pair RunParameterPredictionModel( const miopen::ExecutionContext&, const miopen::conv::ProblemDescription&, @@ -74,8 +75,20 @@ RunParameterPredictionModel( std::string&, std::function(const miopen::conv::ProblemDescription&)>, std::string); + +extern template std::pair +RunParameterPredictionModel( + const miopen::ExecutionContext&, + const miopen::conv::ProblemDescription&, + std::vector&, + int&, + int&, + std::string&, + std::function(const miopen::conv::ProblemDescription&)>, + std::string); + #if MIOPEN_USE_COMPOSABLEKERNEL -extern template std::pair +extern template std::pair RunParameterPredictionModel( const miopen::ExecutionContext&, const miopen::conv::ProblemDescription&, @@ -86,7 +99,7 @@ RunParameterPredictionModel( std::function(const miopen::conv::ProblemDescription&)>, std::string); -extern template std::pair +extern template std::pair RunParameterPredictionModel( const miopen::ExecutionContext&, const miopen::conv::ProblemDescription&, @@ -100,3 +113,4 @@ RunParameterPredictionModel( } // namespace conv } // namespace solver } // namespace miopen +#endif // MIOPEN_ENABLE_AI_KERNEL_TUNING diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 8b38afe368b9..ce1867cd2aa8 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -439,8 +439,6 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( MIOPEN_LOG_I2("Step 1: AI heuristics internal failure"); else if(result.IsEmpty()) MIOPEN_LOG_I2("Step 1: AI heuristics returned empty result"); - else if(valid_kernels.empty()) - ai_success = false; } } else diff --git a/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp b/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp index 212ae007e386..446645d2f425 100644 --- a/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp +++ b/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp @@ -322,10 +322,7 @@ TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Test) std::string kernel_id; std::vector valid_kernels; - bool ai_success = false; - miopen::ai::tuning::candidate_selection::CandidateSelectionResult result; - - std::tie(ai_success, result) = miopen::solver::conv::RunParameterPredictionModel( + auto [ai_success, result] = miopen::solver::conv::RunParameterPredictionModel( ctx, problem, valid_kernels, index, split_k, kernel_id, fill_valid_kernels, solver_name); ASSERT_TRUE(ai_success); @@ -343,11 +340,8 @@ TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Fallback_Test) std::string kernel_id; std::vector valid_kernels; - bool ai_success = false; - miopen::ai::tuning::candidate_selection::CandidateSelectionResult result; - - std::tie(ai_success, result) = miopen::solver::conv::RunParameterPredictionModel( - ctx, problem, valid_kernels, index, split_k, kernel_id, empty_kernels, solver_name); + auto [ai_success, result] = miopen::solver::conv::RunParameterPredictionModel( + ctx, problem, valid_kernels, index, split_k, kernel_id, fill_valid_kernels, solver_name); ASSERT_FALSE(ai_success); ASSERT_TRUE(kernel_id.empty()); From c8eecc8e56799f8da49f98a3d3f312dfdbb824db Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Thu, 2 Oct 2025 13:11:01 +0000 Subject: [PATCH 14/26] Massaged test functions --- .../heuristics/ai_candidate_selection.hpp | 2 +- .../gtest/conv_ai_3d_kernel_tuning_utils.cpp | 70 ++++++++++--------- projects/miopen/test/gtest/group_conv.hpp | 3 +- 3 files changed, 40 insertions(+), 35 deletions(-) diff --git a/projects/miopen/src/include/miopen/conv/heuristics/ai_candidate_selection.hpp b/projects/miopen/src/include/miopen/conv/heuristics/ai_candidate_selection.hpp index 3cf87b1e425b..b05f2749afe0 100644 --- a/projects/miopen/src/include/miopen/conv/heuristics/ai_candidate_selection.hpp +++ b/projects/miopen/src/include/miopen/conv/heuristics/ai_candidate_selection.hpp @@ -121,7 +121,7 @@ MIOPEN_INTERNALS_EXPORT std::vector> EncodeKernelParams(const std::vector>& valid_kernel_params, const CandidateSelectionMetadata& metadata); -struct CandidateSelectionResult +MIOPEN_INTERNALS_EXPORT struct CandidateSelectionResult { std::vector kernel_indices; // Sorted list of kernel indices (best to worst) std::vector split_k_values; // Corresponding split_k values diff --git a/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp b/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp index 446645d2f425..6b8293dc2f42 100644 --- a/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp +++ b/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp @@ -313,39 +313,43 @@ TEST_F(GPU_Conv3DKernelTuningAI_FP32, CandidateSelectionModel_Test) ASSERT_FALSE(meta.output_params().empty()); } -TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Test) -{ - auto problem = - GetReusableProblemDescription(miopenFloat, miopen::conv::Direction::BackwardWeights); - - int index = 0, split_k = 1; - std::string kernel_id; - std::vector valid_kernels; - - auto [ai_success, result] = miopen::solver::conv::RunParameterPredictionModel( - ctx, problem, valid_kernels, index, split_k, kernel_id, fill_valid_kernels, solver_name); - - ASSERT_TRUE(ai_success); - ASSERT_FALSE(kernel_id.empty()); -} - -TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Fallback_Test) -{ - std::function(const miopen::conv::ProblemDescription&)> empty_kernels = - [](const miopen::conv::ProblemDescription&) { return std::vector{}; }; - - auto problem = - GetReusableProblemDescription(miopenFloat, miopen::conv::Direction::BackwardWeights); - int index = 0, split_k = 1; - std::string kernel_id; - std::vector valid_kernels; - - auto [ai_success, result] = miopen::solver::conv::RunParameterPredictionModel( - ctx, problem, valid_kernels, index, split_k, kernel_id, fill_valid_kernels, solver_name); - - ASSERT_FALSE(ai_success); - ASSERT_TRUE(kernel_id.empty()); -} +// These two tests cause the build to fail due to missing symbols/linker errors +// TODO: fix these and reintroduce them + +// TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Test) +// { +// auto problem = +// GetReusableProblemDescription(miopenFloat, miopen::conv::Direction::BackwardWeights); + +// int index = 0, split_k = 1; +// std::string kernel_id; +// std::vector valid_kernels; + +// auto [ai_success, result] = miopen::solver::conv::RunParameterPredictionModel( +// ctx, problem, valid_kernels, index, split_k, kernel_id, fill_valid_kernels, solver_name); + +// ASSERT_TRUE(ai_success); +// ASSERT_FALSE(kernel_id.empty()); +// } + +// TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Fallback_Test) +// { +// std::function(const miopen::conv::ProblemDescription&)> +// empty_kernels = +// [](const miopen::conv::ProblemDescription&) { return std::vector{}; }; + +// auto problem = +// GetReusableProblemDescription(miopenFloat, miopen::conv::Direction::BackwardWeights); +// int index = 0, split_k = 1; +// std::string kernel_id; +// std::vector valid_kernels; + +// auto [ai_success, result] = miopen::solver::conv::RunParameterPredictionModel( +// ctx, problem, valid_kernels, index, split_k, kernel_id, fill_valid_kernels, solver_name); + +// ASSERT_FALSE(ai_success); +// ASSERT_TRUE(kernel_id.empty()); +// } // ------------------- Full Solver Tests ------------------- diff --git a/projects/miopen/test/gtest/group_conv.hpp b/projects/miopen/test/gtest/group_conv.hpp index b3b848e8da7f..27251a14f051 100644 --- a/projects/miopen/test/gtest/group_conv.hpp +++ b/projects/miopen/test/gtest/group_conv.hpp @@ -321,7 +321,8 @@ struct GroupConvTestFix } else { - threshold = 3.0e-3; + // some kernels have an error above 0.3%, so this has been increased to 0.4% + threshold = 4.0e-3; } auto error = miopen::rms_range(ref, computed); From 8bebe474062c3737242cae5af96b9208cf71d817 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Fri, 3 Oct 2025 14:03:28 +0000 Subject: [PATCH 15/26] Refactored RunParameterPredictionModel such that it runs properly in the test functions. --- .../ai_conv_3d_kernel_tuning_utils.cpp | 70 ------------- .../ai_conv_3d_kernel_tuning_utils.hpp | 98 +++++++++++-------- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 2 +- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 5 +- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 4 +- .../gtest/conv_ai_3d_kernel_tuning_utils.cpp | 70 +++++++------ 6 files changed, 93 insertions(+), 156 deletions(-) diff --git a/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp b/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp index 5ea4a93ff0a6..cf0848e39a6c 100644 --- a/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp +++ b/projects/miopen/src/conv/heuristics/ai_conv_3d_kernel_tuning_utils.cpp @@ -207,76 +207,6 @@ std::vector GenerateSplitK(int max_split_k) return split_ks; } -// Main template implementation -template -std::pair -RunParameterPredictionModel( - const miopen::ExecutionContext& ctx, - const ProblemDescription& problem, - std::vector& valid_kernels, - int& index, - int& split_k, - std::string& kernel_id, - std::function(const ProblemDescription&)> fill_valid_kernels, - std::string solver_name) -{ - valid_kernels = fill_valid_kernels(problem); - - // Filter kernels by type - std::vector heuristic_indexes; - std::vector> heuristic_kernels; - FillHeuristicKernels(valid_kernels, heuristic_indexes, heuristic_kernels); - // Prepare features and split_k values - const std::string& arch = ctx.GetStream().GetDeviceName(); - - // Use AI model to select best candidate - try - { - std::map features = - GetFeatures3D(problem, ctx.GetStream().GetMaxComputeUnits(), arch); - - bool use_split_k = split_k != 0; - if(split_k > 1) - { - MIOPEN_THROW("Invalid initial split_k value for performing AI Heuristics: " + - std::to_string(split_k) + ". Expected 0 (no split) or 1 (default split)."); - } - - auto result = ai::tuning::candidate_selection::ModelSelectBestCandidate( - arch, solver_name, features, heuristic_kernels, use_split_k); - - // Check if we have any candidates - if(!result.IsEmpty()) - { - // Get the best candidate (first in the sorted list) - int best_index = result.GetBestKernelIndex(); - int best_split_k = result.GetBestSplitK(); - - if(best_index >= 0 && best_index < static_cast(valid_kernels.size())) - { - index = best_index; - split_k = best_split_k; - if(use_split_k) - { - kernel_id = valid_kernels[index] + "+" + std::to_string(split_k); - } - else - { - kernel_id = valid_kernels[index]; - } - return {true, result}; - } - } - MIOPEN_LOG_I("AI prediction returned invalid kernel index, falling back"); - return {false, result}; - } - catch(const miopen::Exception& ex) - { - MIOPEN_LOG_I2("[Warning] AI model failed: " << ex.what()); - return {false, miopen::ai::tuning::candidate_selection::CandidateSelectionResult{}}; - } -} - // Explicit template instantiations for common types template std::pair RunParameterPredictionModel( diff --git a/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp b/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp index 56543f67653f..2530f073baee 100644 --- a/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp +++ b/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp @@ -51,6 +51,7 @@ MIOPEN_INTERNALS_EXPORT void FillHeuristicKernels(const std::vector std::vector>& kernels); MIOPEN_INTERNALS_EXPORT std::vector GenerateSplitK(int max_split_k); +// Main template implementation template MIOPEN_INTERNALS_EXPORT std::pair @@ -63,53 +64,64 @@ MIOPEN_INTERNALS_EXPORT std::string& kernel_id, std::function(const miopen::conv::ProblemDescription&)> fill_valid_kernels, - std::string solver_name); + std::string solver_name) +{ + valid_kernels = fill_valid_kernels(problem); -extern template std::pair -RunParameterPredictionModel( - const miopen::ExecutionContext&, - const miopen::conv::ProblemDescription&, - std::vector&, - int&, - int&, - std::string&, - std::function(const miopen::conv::ProblemDescription&)>, - std::string); + // Filter kernels by type + std::vector heuristic_indexes; + std::vector> heuristic_kernels; + FillHeuristicKernels(valid_kernels, heuristic_indexes, heuristic_kernels); + // Prepare features and split_k values + const std::string& arch = ctx.GetStream().GetDeviceName(); -extern template std::pair -RunParameterPredictionModel( - const miopen::ExecutionContext&, - const miopen::conv::ProblemDescription&, - std::vector&, - int&, - int&, - std::string&, - std::function(const miopen::conv::ProblemDescription&)>, - std::string); + // Use AI model to select best candidate + try + { + std::map features = + GetFeatures3D(problem, ctx.GetStream().GetMaxComputeUnits(), arch); -#if MIOPEN_USE_COMPOSABLEKERNEL -extern template std::pair -RunParameterPredictionModel( - const miopen::ExecutionContext&, - const miopen::conv::ProblemDescription&, - std::vector&, - int&, - int&, - std::string&, - std::function(const miopen::conv::ProblemDescription&)>, - std::string); + bool use_split_k = split_k != 0; + if(split_k > 1) + { + MIOPEN_THROW("Invalid initial split_k value for performing AI Heuristics: " + + std::to_string(split_k) + ". Expected 0 (no split) or 1 (default split)."); + } -extern template std::pair -RunParameterPredictionModel( - const miopen::ExecutionContext&, - const miopen::conv::ProblemDescription&, - std::vector&, - int&, - int&, - std::string&, - std::function(const miopen::conv::ProblemDescription&)>, - std::string); -#endif + auto result = ai::tuning::candidate_selection::ModelSelectBestCandidate( + arch, solver_name, features, heuristic_kernels, use_split_k); + + // Check if we have any candidates + if(!result.IsEmpty()) + { + // Get the best candidate (first in the sorted list) + int best_index = result.GetBestKernelIndex(); + int best_split_k = result.GetBestSplitK(); + + if(best_index >= 0 && best_index < static_cast(valid_kernels.size())) + { + index = best_index; + split_k = best_split_k; + if(use_split_k) + { + kernel_id = valid_kernels[index] + "+" + std::to_string(split_k); + } + else + { + kernel_id = valid_kernels[index]; + } + return {true, result}; + } + } + MIOPEN_LOG_I("AI prediction returned invalid kernel index, falling back"); + return {false, result}; + } + catch(const miopen::Exception& ex) + { + MIOPEN_LOG_I2("[Warning] AI model failed: " << ex.what()); + return {false, miopen::ai::tuning::candidate_selection::CandidateSelectionResult{}}; + } +} } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 60887b19ba6d..f8df4b288468 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -422,7 +422,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ProblemDescription& problem) { index = 0; - kernel_id = valid_kernels[index]; + kernel_id = "None"; split_k = 0; // split_k is not used in this solver, but it is required by the interface #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index a2108c02670d..3fc182ccb8c7 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -420,9 +420,8 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ProblemDescription& problem) { index = 0; - kernel_id = valid_kernels[index]; - split_k = 0; - // split_k is not used in this solver, but it is required by the AI heuristics interface + kernel_id = "None"; + split_k = 0; // split_k is not used in this solver, but it is required by the interface #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL // 1. IDX_OVERRIDE is preferred diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index ce1867cd2aa8..533ff1ed5f31 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -386,8 +386,8 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ProblemDescription& problem) { index = 0; - split_k = 1; - kernel_id = ""; + kernel_id = "None"; + split_k = 1; // This solver uses split_k, so initialize to 1 #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL // 1. AI heuristics (if enabled) diff --git a/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp b/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp index 6b8293dc2f42..3e86c0ad9f4e 100644 --- a/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp +++ b/projects/miopen/test/gtest/conv_ai_3d_kernel_tuning_utils.cpp @@ -313,43 +313,39 @@ TEST_F(GPU_Conv3DKernelTuningAI_FP32, CandidateSelectionModel_Test) ASSERT_FALSE(meta.output_params().empty()); } -// These two tests cause the build to fail due to missing symbols/linker errors -// TODO: fix these and reintroduce them - -// TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Test) -// { -// auto problem = -// GetReusableProblemDescription(miopenFloat, miopen::conv::Direction::BackwardWeights); - -// int index = 0, split_k = 1; -// std::string kernel_id; -// std::vector valid_kernels; - -// auto [ai_success, result] = miopen::solver::conv::RunParameterPredictionModel( -// ctx, problem, valid_kernels, index, split_k, kernel_id, fill_valid_kernels, solver_name); - -// ASSERT_TRUE(ai_success); -// ASSERT_FALSE(kernel_id.empty()); -// } - -// TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Fallback_Test) -// { -// std::function(const miopen::conv::ProblemDescription&)> -// empty_kernels = -// [](const miopen::conv::ProblemDescription&) { return std::vector{}; }; - -// auto problem = -// GetReusableProblemDescription(miopenFloat, miopen::conv::Direction::BackwardWeights); -// int index = 0, split_k = 1; -// std::string kernel_id; -// std::vector valid_kernels; - -// auto [ai_success, result] = miopen::solver::conv::RunParameterPredictionModel( -// ctx, problem, valid_kernels, index, split_k, kernel_id, fill_valid_kernels, solver_name); - -// ASSERT_FALSE(ai_success); -// ASSERT_TRUE(kernel_id.empty()); -// } +TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Test) +{ + auto problem = + GetReusableProblemDescription(miopenFloat, miopen::conv::Direction::BackwardWeights); + + int index = 0, split_k = 1; + std::string kernel_id; + std::vector valid_kernels; + + auto [ai_success, result] = miopen::solver::conv::RunParameterPredictionModel( + ctx, problem, valid_kernels, index, split_k, kernel_id, fill_valid_kernels, solver_name); + + ASSERT_TRUE(ai_success); + ASSERT_FALSE(kernel_id.empty()); +} + +TEST_F(GPU_Conv3DKernelTuningAI_FP32, RunParameterPredictionModel_Fallback_Test) +{ + std::function(const miopen::conv::ProblemDescription&)> empty_kernels = + [](const miopen::conv::ProblemDescription&) { return std::vector{}; }; + + auto problem = + GetReusableProblemDescription(miopenFloat, miopen::conv::Direction::BackwardWeights); + int index = 0, split_k = 1; + std::string kernel_id; + std::vector valid_kernels; + + auto [ai_success, result] = miopen::solver::conv::RunParameterPredictionModel( + ctx, problem, valid_kernels, index, split_k, kernel_id, empty_kernels, solver_name); + + ASSERT_FALSE(ai_success); + ASSERT_TRUE(kernel_id.empty()); +} // ------------------- Full Solver Tests ------------------- From 8c3292fc6153073abf8dabfea7d40652a6accb9c Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Mon, 6 Oct 2025 07:29:18 +0000 Subject: [PATCH 16/26] Moved MIOPEN_INTERNALS_EXPORT macro to correct position for RunParameterPredictionModel --- .../ai_conv_3d_kernel_tuning_utils.hpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp b/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp index 2530f073baee..8c98d119a178 100644 --- a/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp +++ b/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp @@ -52,19 +52,19 @@ MIOPEN_INTERNALS_EXPORT void FillHeuristicKernels(const std::vector MIOPEN_INTERNALS_EXPORT std::vector GenerateSplitK(int max_split_k); // Main template implementation -template MIOPEN_INTERNALS_EXPORT - std::pair - RunParameterPredictionModel( - const miopen::ExecutionContext& ctx, - const miopen::conv::ProblemDescription& problem, - std::vector& valid_kernels, - int& index, - int& split_k, - std::string& kernel_id, - std::function(const miopen::conv::ProblemDescription&)> - fill_valid_kernels, - std::string solver_name) +template +std::pair +RunParameterPredictionModel( + const miopen::ExecutionContext& ctx, + const miopen::conv::ProblemDescription& problem, + std::vector& valid_kernels, + int& index, + int& split_k, + std::string& kernel_id, + std::function(const miopen::conv::ProblemDescription&)> + fill_valid_kernels, + std::string solver_name) { valid_kernels = fill_valid_kernels(problem); From af912497c6366a2cf4a4bfa6e3172de5f037a019 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Mon, 6 Oct 2025 11:41:21 +0000 Subject: [PATCH 17/26] removed unneccesary MIOPEN_INTERNALS_EXPORT for RunParameterPrediction model --- .../miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp b/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp index 8c98d119a178..76b4bcbcd511 100644 --- a/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp +++ b/projects/miopen/src/include/miopen/conv/heuristics/ai_conv_3d_kernel_tuning_utils.hpp @@ -52,7 +52,6 @@ MIOPEN_INTERNALS_EXPORT void FillHeuristicKernels(const std::vector MIOPEN_INTERNALS_EXPORT std::vector GenerateSplitK(int max_split_k); // Main template implementation -MIOPEN_INTERNALS_EXPORT template std::pair RunParameterPredictionModel( From 82ee7ccd4bb6268520ee9dbdb6bbaac24a5544f6 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Thu, 9 Oct 2025 07:24:03 +0000 Subject: [PATCH 18/26] fixed potential namespace confusion caused by ProblemDescription. --- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 32 ++++++++--------- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 32 ++++++++--------- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 36 +++++++++---------- 3 files changed, 47 insertions(+), 53 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index f8df4b288468..dbb7d8c397bd 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -50,8 +50,6 @@ namespace miopen { namespace solver { namespace conv { -using ProblemDescription = miopen::conv::ProblemDescription; - #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL using InLayout = ck::tensor_layout::convolution::NDHWGC; @@ -129,7 +127,7 @@ namespace { template struct CKArgs { - CKArgs(const ProblemDescription& problem) + CKArgs(const miopen::conv::ProblemDescription& problem) { G = ProblemInterpreter::GetGroupCountG(problem); N = ProblemInterpreter::GetBatchN(problem); @@ -351,7 +349,7 @@ struct CKArgs }; template -std::vector FillValidKernelsByAlphaBeta(const ProblemDescription& problem) +std::vector FillValidKernelsByAlphaBeta(const miopen::conv::ProblemDescription& problem) { switch(problem.GetAlphaBetaCase()) { @@ -366,7 +364,7 @@ std::vector FillValidKernelsByAlphaBeta(const ProblemDescription& p } // namespace template -void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::Init(const ProblemDescription& problem) +void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::Init(const miopen::conv::ProblemDescription& problem) { valid_kernels = FillValidKernelsByAlphaBeta(problem); index = 0; @@ -375,7 +373,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::Init(const ProblemDescrip template bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::CheckIsSupportCKArgs( - const ProblemDescription& problem) const + const miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -393,7 +391,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::CheckIsSupportCKArgs( template bool ConvHipImplicitGemm3DGroupBwdXdlops::CheckCKApplicability( - const ProblemDescription& problem) const + const miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -405,7 +403,7 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::CheckCKApplicability( } void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( - const ProblemDescription& problem) + const miopen::conv::ProblemDescription& problem) { switch(problem.GetInDataType()) { @@ -419,7 +417,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( #endif void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( - const miopen::ExecutionContext& ctx, const ProblemDescription& problem) + const miopen::ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) { index = 0; kernel_id = "None"; @@ -499,7 +497,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( } bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::SetNextValue( - const ProblemDescription& problem) + const miopen::conv::ProblemDescription& problem) { if(valid_kernels.empty()) { @@ -524,7 +522,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::IsValidValue() const } bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::IsValid( - [[maybe_unused]] const ProblemDescription& problem) const + [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetInDataType()) @@ -551,7 +549,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::operator==( PerformanceConfigHipImplicitGemm3DGroupBwdXdlops ConvHipImplicitGemm3DGroupBwdXdlops::GetDefaultPerformanceConfig( - const ExecutionContext& ctx, const ProblemDescription& problem) const + const ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) const { PerformanceConfigHipImplicitGemm3DGroupBwdXdlops pp; pp.HeuristicInit(ctx, problem); @@ -560,7 +558,7 @@ ConvHipImplicitGemm3DGroupBwdXdlops::GetDefaultPerformanceConfig( bool ConvHipImplicitGemm3DGroupBwdXdlops::IsValidPerformanceConfig( const ExecutionContext&, - const ProblemDescription& problem, + const miopen::conv::ProblemDescription& problem, const PerformanceConfigHipImplicitGemm3DGroupBwdXdlops& config) const { return config.IsValid(problem); @@ -568,14 +566,14 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::IsValidPerformanceConfig( size_t ConvHipImplicitGemm3DGroupBwdXdlops::GetWorkspaceSize(const ExecutionContext&, - const ProblemDescription& problem) const + const miopen::conv::ProblemDescription& problem) const { return GetWorkspaceSizeLayoutTransformConv(problem); } PerformanceConfigHipImplicitGemm3DGroupBwdXdlops ConvHipImplicitGemm3DGroupBwdXdlops::Search(const ExecutionContext& ctx, - const ProblemDescription& problem, + const miopen::conv::ProblemDescription& problem, const AnyInvokeParams& invoke_ctx) const { return GenericSearch(*this, ctx, problem, invoke_ctx); @@ -583,7 +581,7 @@ ConvHipImplicitGemm3DGroupBwdXdlops::Search(const ExecutionContext& ctx, bool ConvHipImplicitGemm3DGroupBwdXdlops::IsApplicable( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const ProblemDescription& problem) const + [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL if(env::disabled(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS)) @@ -623,7 +621,7 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::IsApplicable( ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const ProblemDescription& problem, + [[maybe_unused]] const miopen::conv::ProblemDescription& problem, [[maybe_unused]] const PerformanceConfigHipImplicitGemm3DGroupBwdXdlops& config) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index 3fc182ccb8c7..b61ec9aa48e9 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -50,8 +50,6 @@ namespace miopen { namespace solver { namespace conv { -using ProblemDescription = miopen::conv::ProblemDescription; - #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL using InLayout = ck::tensor_layout::convolution::NDHWGC; @@ -127,7 +125,7 @@ namespace { template struct CKArgs { - CKArgs(const ProblemDescription& problem) + CKArgs(const miopen::conv::ProblemDescription& problem) { G = ProblemInterpreter::GetGroupCountG(problem); N = ProblemInterpreter::GetBatchN(problem); @@ -346,7 +344,7 @@ struct CKArgs }; template -std::vector FillValidKernelsByAlphaBeta(const ProblemDescription& problem) +std::vector FillValidKernelsByAlphaBeta(const miopen::conv::ProblemDescription& problem) { switch(problem.GetAlphaBetaCase()) { @@ -364,7 +362,7 @@ std::vector FillValidKernelsByAlphaBeta(const ProblemDescription& p } // namespace template -void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::Init(const ProblemDescription& problem) +void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::Init(const miopen::conv::ProblemDescription& problem) { valid_kernels = FillValidKernelsByAlphaBeta(problem); index = 0; @@ -373,7 +371,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::Init(const ProblemDescrip template bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::CheckIsSupportCKArgs( - const ProblemDescription& problem) const + const miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -391,7 +389,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::CheckIsSupportCKArgs( template bool ConvHipImplicitGemm3DGroupFwdXdlops::CheckCKApplicability( - const ProblemDescription& problem) const + const miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -403,7 +401,7 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::CheckCKApplicability( } void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( - const ProblemDescription& problem) + const miopen::conv::ProblemDescription& problem) { switch(problem.GetInDataType()) { @@ -417,7 +415,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( #endif void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( - const miopen::ExecutionContext& ctx, const ProblemDescription& problem) + const miopen::ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) { index = 0; kernel_id = "None"; @@ -621,7 +619,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( } bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::SetNextValue( - const ProblemDescription& problem) + const miopen::conv::ProblemDescription& problem) { if(valid_kernels.empty()) { @@ -646,7 +644,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::IsValidValue() const } bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::IsValid( - [[maybe_unused]] const ProblemDescription& problem) const + [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetInDataType()) @@ -673,7 +671,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::operator==( PerformanceConfigHipImplicitGemm3DGroupFwdXdlops ConvHipImplicitGemm3DGroupFwdXdlops::GetDefaultPerformanceConfig( - const ExecutionContext& ctx, const ProblemDescription& problem) const + const ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) const { PerformanceConfigHipImplicitGemm3DGroupFwdXdlops pp; pp.HeuristicInit(ctx, problem); @@ -682,7 +680,7 @@ ConvHipImplicitGemm3DGroupFwdXdlops::GetDefaultPerformanceConfig( bool ConvHipImplicitGemm3DGroupFwdXdlops::IsValidPerformanceConfig( const ExecutionContext&, - const ProblemDescription& problem, + const miopen::conv::ProblemDescription& problem, const PerformanceConfigHipImplicitGemm3DGroupFwdXdlops& config) const { return config.IsValid(problem); @@ -690,14 +688,14 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::IsValidPerformanceConfig( size_t ConvHipImplicitGemm3DGroupFwdXdlops::GetWorkspaceSize(const ExecutionContext&, - const ProblemDescription& problem) const + const miopen::conv::ProblemDescription& problem) const { return GetWorkspaceSizeLayoutTransformConv(problem); } PerformanceConfigHipImplicitGemm3DGroupFwdXdlops ConvHipImplicitGemm3DGroupFwdXdlops::Search(const ExecutionContext& ctx, - const ProblemDescription& problem, + const miopen::conv::ProblemDescription& problem, const AnyInvokeParams& invoke_ctx) const { return GenericSearch(*this, ctx, problem, invoke_ctx); @@ -705,7 +703,7 @@ ConvHipImplicitGemm3DGroupFwdXdlops::Search(const ExecutionContext& ctx, bool ConvHipImplicitGemm3DGroupFwdXdlops::IsApplicable( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const ProblemDescription& problem) const + [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL if(env::disabled(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS)) @@ -773,7 +771,7 @@ float ConvHipImplicitGemm3DGroupFwdXdlops::GetWti( ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const ProblemDescription& problem, + [[maybe_unused]] const miopen::conv::ProblemDescription& problem, [[maybe_unused]] const PerformanceConfigHipImplicitGemm3DGroupFwdXdlops& config) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 533ff1ed5f31..4492f87a5478 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -47,8 +47,6 @@ namespace miopen { namespace solver { namespace conv { -using ProblemDescription = miopen::conv::ProblemDescription; - #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL namespace { @@ -56,7 +54,7 @@ namespace { template struct CKArgs { - CKArgs(const ProblemDescription& problem) + CKArgs(const miopen::conv::ProblemDescription& problem) { G = ProblemInterpreter::GetGroupCountG(problem); N = ProblemInterpreter::GetBatchN(problem); @@ -310,7 +308,7 @@ struct CKArgs }; template -std::vector FillValidKernelsByAlphaBeta(const ProblemDescription& problem) +std::vector FillValidKernelsByAlphaBeta(const miopen::conv::ProblemDescription& problem) { switch(problem.GetAlphaBetaCase()) { @@ -328,7 +326,7 @@ std::vector FillValidKernelsByAlphaBeta(const ProblemDescription& p } // namespace template -void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::Init(const ProblemDescription& problem) +void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::Init(const miopen::conv::ProblemDescription& problem) { valid_kernels = FillValidKernelsByAlphaBeta(problem); index = 0; @@ -338,7 +336,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::Init(const ProblemDescrip template bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::CheckIsSupportCKArgs( - const ProblemDescription& problem) const + const miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -356,7 +354,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::CheckIsSupportCKArgs( template bool ConvHipImplicitGemm3DGroupWrwXdlops::CheckCKApplicability( - const ProblemDescription& problem) const + const miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -369,7 +367,7 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::CheckCKApplicability( } } void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( - const ProblemDescription& problem) + const miopen::conv::ProblemDescription& problem) { switch(problem.GetInDataType()) { @@ -383,7 +381,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( #endif void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( - const miopen::ExecutionContext& ctx, const ProblemDescription& problem) + const miopen::ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) { index = 0; kernel_id = "None"; @@ -468,7 +466,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( } bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::SetNextValue( - const ProblemDescription& problem) + const miopen::conv::ProblemDescription& problem) { #if MIOPEN_USE_COMPOSABLEKERNEL if(valid_kernels.empty()) @@ -507,7 +505,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::IsValidValue() const } bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::IsValid( - [[maybe_unused]] const ProblemDescription& problem) const + [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetInDataType()) @@ -534,7 +532,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::operator==( PerformanceConfigHipImplicitGemm3DGroupWrwXdlops ConvHipImplicitGemm3DGroupWrwXdlops::GetDefaultPerformanceConfig( - const ExecutionContext& ctx, const ProblemDescription& problem) const + const ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) const { PerformanceConfigHipImplicitGemm3DGroupWrwXdlops pp; pp.HeuristicInit(ctx, problem); @@ -543,7 +541,7 @@ ConvHipImplicitGemm3DGroupWrwXdlops::GetDefaultPerformanceConfig( bool ConvHipImplicitGemm3DGroupWrwXdlops::IsValidPerformanceConfig( const ExecutionContext&, - const ProblemDescription& problem, + const miopen::conv::ProblemDescription& problem, const PerformanceConfigHipImplicitGemm3DGroupWrwXdlops& config) const { return config.IsValid(problem); @@ -551,7 +549,7 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::IsValidPerformanceConfig( template size_t -ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize(const ProblemDescription& problem) const +ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize(const miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetAlphaBetaCase()) @@ -572,7 +570,7 @@ ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize(const ProblemDescript } size_t -ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize(const ProblemDescription& problem) const +ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize(const miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetInDataType()) @@ -593,7 +591,7 @@ ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize(const ProblemDescript size_t ConvHipImplicitGemm3DGroupWrwXdlops::GetWorkspaceSize(const ExecutionContext&, - const ProblemDescription& problem) const + const miopen::conv::ProblemDescription& problem) const { auto ck_ws_size = GetCKMaxWorkspaceSize(problem); return GetWorkspaceSizeLayoutTransformConv(problem, ck_ws_size); @@ -601,7 +599,7 @@ ConvHipImplicitGemm3DGroupWrwXdlops::GetWorkspaceSize(const ExecutionContext&, PerformanceConfigHipImplicitGemm3DGroupWrwXdlops ConvHipImplicitGemm3DGroupWrwXdlops::Search(const ExecutionContext& ctx, - const ProblemDescription& problem, + const miopen::conv::ProblemDescription& problem, const AnyInvokeParams& invoke_ctx) const { return GenericSearch(*this, ctx, problem, invoke_ctx); @@ -609,7 +607,7 @@ ConvHipImplicitGemm3DGroupWrwXdlops::Search(const ExecutionContext& ctx, bool ConvHipImplicitGemm3DGroupWrwXdlops::IsApplicable( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const ProblemDescription& problem) const + [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL if(env::disabled(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS)) @@ -652,7 +650,7 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::IsApplicable( ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const ProblemDescription& problem, + [[maybe_unused]] const miopen::conv::ProblemDescription& problem, [[maybe_unused]] const PerformanceConfigHipImplicitGemm3DGroupWrwXdlops& config) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL From 7272d4ade8258aeeba730201e965571497458ea6 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Thu, 9 Oct 2025 08:40:39 +0000 Subject: [PATCH 19/26] More namespace fixes for miopen::conv::ProblemDescription. Moved env var to top of file to avoid namespace conflicts there. --- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 48 ++++++++-------- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 51 +++++++++-------- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 55 ++++++++++--------- 3 files changed, 81 insertions(+), 73 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index dbb7d8c397bd..5bd84c271b4c 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -27,8 +27,12 @@ #include #include -#include #include + +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS_AI_HEUR) + +#include #include #include #include @@ -43,9 +47,6 @@ #endif #include -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS) -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS_AI_HEUR) - namespace miopen { namespace solver { namespace conv { @@ -127,7 +128,7 @@ namespace { template struct CKArgs { - CKArgs(const miopen::conv::ProblemDescription& problem) + CKArgs(const ::miopen::conv::ProblemDescription& problem) { G = ProblemInterpreter::GetGroupCountG(problem); N = ProblemInterpreter::GetBatchN(problem); @@ -349,7 +350,8 @@ struct CKArgs }; template -std::vector FillValidKernelsByAlphaBeta(const miopen::conv::ProblemDescription& problem) +std::vector +FillValidKernelsByAlphaBeta(const ::miopen::conv::ProblemDescription& problem) { switch(problem.GetAlphaBetaCase()) { @@ -364,7 +366,8 @@ std::vector FillValidKernelsByAlphaBeta(const miopen::conv::Problem } // namespace template -void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::Init(const miopen::conv::ProblemDescription& problem) +void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::Init( + const ::miopen::conv::ProblemDescription& problem) { valid_kernels = FillValidKernelsByAlphaBeta(problem); index = 0; @@ -373,7 +376,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::Init(const miopen::conv:: template bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::CheckIsSupportCKArgs( - const miopen::conv::ProblemDescription& problem) const + const ::miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -391,7 +394,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::CheckIsSupportCKArgs( template bool ConvHipImplicitGemm3DGroupBwdXdlops::CheckCKApplicability( - const miopen::conv::ProblemDescription& problem) const + const ::miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -403,7 +406,7 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::CheckCKApplicability( } void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( - const miopen::conv::ProblemDescription& problem) + const ::miopen::conv::ProblemDescription& problem) { switch(problem.GetInDataType()) { @@ -417,7 +420,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( #endif void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( - const miopen::ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) + const miopen::ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) { index = 0; kernel_id = "None"; @@ -440,7 +443,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( auto run_ai_heuristics = [&](auto CKDataType) { using T = decltype(CKDataType); auto fill_valid_kernels = - [=](const miopen::conv::ProblemDescription& problem) -> std::vector { + [=](const ::miopen::conv::ProblemDescription& problem) -> std::vector { return FillValidKernelsByAlphaBeta(problem); }; return miopen::solver::conv::RunParameterPredictionModel(ctx, @@ -497,7 +500,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( } bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::SetNextValue( - const miopen::conv::ProblemDescription& problem) + const ::miopen::conv::ProblemDescription& problem) { if(valid_kernels.empty()) { @@ -522,7 +525,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::IsValidValue() const } bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::IsValid( - [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const + [[maybe_unused]] const ::miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetInDataType()) @@ -549,7 +552,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::operator==( PerformanceConfigHipImplicitGemm3DGroupBwdXdlops ConvHipImplicitGemm3DGroupBwdXdlops::GetDefaultPerformanceConfig( - const ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) const + const ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) const { PerformanceConfigHipImplicitGemm3DGroupBwdXdlops pp; pp.HeuristicInit(ctx, problem); @@ -558,22 +561,21 @@ ConvHipImplicitGemm3DGroupBwdXdlops::GetDefaultPerformanceConfig( bool ConvHipImplicitGemm3DGroupBwdXdlops::IsValidPerformanceConfig( const ExecutionContext&, - const miopen::conv::ProblemDescription& problem, + const ::miopen::conv::ProblemDescription& problem, const PerformanceConfigHipImplicitGemm3DGroupBwdXdlops& config) const { return config.IsValid(problem); } -size_t -ConvHipImplicitGemm3DGroupBwdXdlops::GetWorkspaceSize(const ExecutionContext&, - const miopen::conv::ProblemDescription& problem) const +size_t ConvHipImplicitGemm3DGroupBwdXdlops::GetWorkspaceSize( + const ExecutionContext&, const ::miopen::conv::ProblemDescription& problem) const { return GetWorkspaceSizeLayoutTransformConv(problem); } PerformanceConfigHipImplicitGemm3DGroupBwdXdlops ConvHipImplicitGemm3DGroupBwdXdlops::Search(const ExecutionContext& ctx, - const miopen::conv::ProblemDescription& problem, + const ::miopen::conv::ProblemDescription& problem, const AnyInvokeParams& invoke_ctx) const { return GenericSearch(*this, ctx, problem, invoke_ctx); @@ -581,7 +583,7 @@ ConvHipImplicitGemm3DGroupBwdXdlops::Search(const ExecutionContext& ctx, bool ConvHipImplicitGemm3DGroupBwdXdlops::IsApplicable( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const + [[maybe_unused]] const ::miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL if(env::disabled(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS)) @@ -621,7 +623,7 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::IsApplicable( ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const miopen::conv::ProblemDescription& problem, + [[maybe_unused]] const ::miopen::conv::ProblemDescription& problem, [[maybe_unused]] const PerformanceConfigHipImplicitGemm3DGroupBwdXdlops& config) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL @@ -686,7 +688,7 @@ ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( #if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( - const miopen::conv::ProblemDescription&) + const ::miopen::conv::ProblemDescription&) { // No-op stub for non-CK builds } diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index b61ec9aa48e9..36dcc16d2455 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -27,8 +27,13 @@ #include #include #include -#include #include + +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS) +MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_IDX_OVERRIDE); +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR) + +#include #include #include #include @@ -42,9 +47,6 @@ #include #endif #include -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS) -MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_IDX_OVERRIDE); -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI_HEUR) namespace miopen { namespace solver { @@ -125,7 +127,7 @@ namespace { template struct CKArgs { - CKArgs(const miopen::conv::ProblemDescription& problem) + CKArgs(const ::miopen::conv::ProblemDescription& problem) { G = ProblemInterpreter::GetGroupCountG(problem); N = ProblemInterpreter::GetBatchN(problem); @@ -344,7 +346,8 @@ struct CKArgs }; template -std::vector FillValidKernelsByAlphaBeta(const miopen::conv::ProblemDescription& problem) +std::vector +FillValidKernelsByAlphaBeta(const ::miopen::conv::ProblemDescription& problem) { switch(problem.GetAlphaBetaCase()) { @@ -362,7 +365,8 @@ std::vector FillValidKernelsByAlphaBeta(const miopen::conv::Problem } // namespace template -void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::Init(const miopen::conv::ProblemDescription& problem) +void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::Init( + const ::miopen::conv::ProblemDescription& problem) { valid_kernels = FillValidKernelsByAlphaBeta(problem); index = 0; @@ -371,7 +375,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::Init(const miopen::conv:: template bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::CheckIsSupportCKArgs( - const miopen::conv::ProblemDescription& problem) const + const ::miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -389,7 +393,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::CheckIsSupportCKArgs( template bool ConvHipImplicitGemm3DGroupFwdXdlops::CheckCKApplicability( - const miopen::conv::ProblemDescription& problem) const + const ::miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -401,7 +405,7 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::CheckCKApplicability( } void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( - const miopen::conv::ProblemDescription& problem) + const ::miopen::conv::ProblemDescription& problem) { switch(problem.GetInDataType()) { @@ -415,7 +419,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( #endif void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( - const miopen::ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) + const miopen::ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) { index = 0; kernel_id = "None"; @@ -563,7 +567,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( auto run_ai_heuristics = [&](auto CKDataType) { using T = decltype(CKDataType); auto fill_valid_kernels = - [=](const miopen::conv::ProblemDescription& problem) -> std::vector { + [=](const ::miopen::conv::ProblemDescription& problem) -> std::vector { return FillValidKernelsByAlphaBeta(problem); }; return miopen::solver::conv::RunParameterPredictionModel(ctx, @@ -619,7 +623,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( } bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::SetNextValue( - const miopen::conv::ProblemDescription& problem) + const ::miopen::conv::ProblemDescription& problem) { if(valid_kernels.empty()) { @@ -644,7 +648,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::IsValidValue() const } bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::IsValid( - [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const + [[maybe_unused]] const ::miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetInDataType()) @@ -671,7 +675,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::operator==( PerformanceConfigHipImplicitGemm3DGroupFwdXdlops ConvHipImplicitGemm3DGroupFwdXdlops::GetDefaultPerformanceConfig( - const ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) const + const ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) const { PerformanceConfigHipImplicitGemm3DGroupFwdXdlops pp; pp.HeuristicInit(ctx, problem); @@ -680,22 +684,21 @@ ConvHipImplicitGemm3DGroupFwdXdlops::GetDefaultPerformanceConfig( bool ConvHipImplicitGemm3DGroupFwdXdlops::IsValidPerformanceConfig( const ExecutionContext&, - const miopen::conv::ProblemDescription& problem, + const ::miopen::conv::ProblemDescription& problem, const PerformanceConfigHipImplicitGemm3DGroupFwdXdlops& config) const { return config.IsValid(problem); } -size_t -ConvHipImplicitGemm3DGroupFwdXdlops::GetWorkspaceSize(const ExecutionContext&, - const miopen::conv::ProblemDescription& problem) const +size_t ConvHipImplicitGemm3DGroupFwdXdlops::GetWorkspaceSize( + const ExecutionContext&, const ::miopen::conv::ProblemDescription& problem) const { return GetWorkspaceSizeLayoutTransformConv(problem); } PerformanceConfigHipImplicitGemm3DGroupFwdXdlops ConvHipImplicitGemm3DGroupFwdXdlops::Search(const ExecutionContext& ctx, - const miopen::conv::ProblemDescription& problem, + const ::miopen::conv::ProblemDescription& problem, const AnyInvokeParams& invoke_ctx) const { return GenericSearch(*this, ctx, problem, invoke_ctx); @@ -703,7 +706,7 @@ ConvHipImplicitGemm3DGroupFwdXdlops::Search(const ExecutionContext& ctx, bool ConvHipImplicitGemm3DGroupFwdXdlops::IsApplicable( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const + [[maybe_unused]] const ::miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL if(env::disabled(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS)) @@ -742,7 +745,7 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::IsApplicable( } float ConvHipImplicitGemm3DGroupFwdXdlops::GetWti( - const ExecutionContext&, const miopen::conv::ProblemDescription& problem) const + const ExecutionContext&, const ::miopen::conv::ProblemDescription& problem) const { decltype(auto) xDesc = problem.GetIn(); decltype(auto) wDesc = problem.GetWeights(); @@ -771,7 +774,7 @@ float ConvHipImplicitGemm3DGroupFwdXdlops::GetWti( ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const miopen::conv::ProblemDescription& problem, + [[maybe_unused]] const ::miopen::conv::ProblemDescription& problem, [[maybe_unused]] const PerformanceConfigHipImplicitGemm3DGroupFwdXdlops& config) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL @@ -836,7 +839,7 @@ ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( #if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( - const miopen::conv::ProblemDescription&) + const ::miopen::conv::ProblemDescription&) { // No-op stub for non-CK builds } diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 4492f87a5478..bedba5d3142a 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -27,8 +27,12 @@ #include #include -#include #include + +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS_AI_HEUR) + +#include #include #include #include @@ -40,8 +44,6 @@ #endif #include #include -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS) -MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS_AI_HEUR) namespace miopen { namespace solver { @@ -54,7 +56,7 @@ namespace { template struct CKArgs { - CKArgs(const miopen::conv::ProblemDescription& problem) + CKArgs(const ::miopen::conv::ProblemDescription& problem) { G = ProblemInterpreter::GetGroupCountG(problem); N = ProblemInterpreter::GetBatchN(problem); @@ -308,7 +310,8 @@ struct CKArgs }; template -std::vector FillValidKernelsByAlphaBeta(const miopen::conv::ProblemDescription& problem) +std::vector +FillValidKernelsByAlphaBeta(const ::miopen::conv::ProblemDescription& problem) { switch(problem.GetAlphaBetaCase()) { @@ -326,7 +329,8 @@ std::vector FillValidKernelsByAlphaBeta(const miopen::conv::Problem } // namespace template -void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::Init(const miopen::conv::ProblemDescription& problem) +void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::Init( + const ::miopen::conv::ProblemDescription& problem) { valid_kernels = FillValidKernelsByAlphaBeta(problem); index = 0; @@ -336,7 +340,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::Init(const miopen::conv:: template bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::CheckIsSupportCKArgs( - const miopen::conv::ProblemDescription& problem) const + const ::miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -354,7 +358,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::CheckIsSupportCKArgs( template bool ConvHipImplicitGemm3DGroupWrwXdlops::CheckCKApplicability( - const miopen::conv::ProblemDescription& problem) const + const ::miopen::conv::ProblemDescription& problem) const { switch(problem.GetAlphaBetaCase()) { @@ -367,7 +371,7 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::CheckCKApplicability( } } void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( - const miopen::conv::ProblemDescription& problem) + const ::miopen::conv::ProblemDescription& problem) { switch(problem.GetInDataType()) { @@ -381,7 +385,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( #endif void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( - const miopen::ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) + const miopen::ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) { index = 0; kernel_id = "None"; @@ -404,7 +408,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( auto run_ai_heuristics = [&](auto CKDataType) { using T = decltype(CKDataType); auto fill_valid_kernels = - [=](const miopen::conv::ProblemDescription& problem) -> std::vector { + [=](const ::miopen::conv::ProblemDescription& problem) -> std::vector { return FillValidKernelsByAlphaBeta(problem); }; return miopen::solver::conv::RunParameterPredictionModel(ctx, @@ -466,7 +470,7 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( } bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::SetNextValue( - const miopen::conv::ProblemDescription& problem) + const ::miopen::conv::ProblemDescription& problem) { #if MIOPEN_USE_COMPOSABLEKERNEL if(valid_kernels.empty()) @@ -505,7 +509,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::IsValidValue() const } bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::IsValid( - [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const + [[maybe_unused]] const ::miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetInDataType()) @@ -532,7 +536,7 @@ bool PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::operator==( PerformanceConfigHipImplicitGemm3DGroupWrwXdlops ConvHipImplicitGemm3DGroupWrwXdlops::GetDefaultPerformanceConfig( - const ExecutionContext& ctx, const miopen::conv::ProblemDescription& problem) const + const ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) const { PerformanceConfigHipImplicitGemm3DGroupWrwXdlops pp; pp.HeuristicInit(ctx, problem); @@ -541,15 +545,15 @@ ConvHipImplicitGemm3DGroupWrwXdlops::GetDefaultPerformanceConfig( bool ConvHipImplicitGemm3DGroupWrwXdlops::IsValidPerformanceConfig( const ExecutionContext&, - const miopen::conv::ProblemDescription& problem, + const ::miopen::conv::ProblemDescription& problem, const PerformanceConfigHipImplicitGemm3DGroupWrwXdlops& config) const { return config.IsValid(problem); } template -size_t -ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize(const miopen::conv::ProblemDescription& problem) const +size_t ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize( + const ::miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetAlphaBetaCase()) @@ -569,8 +573,8 @@ ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize(const miopen::conv::P #endif } -size_t -ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize(const miopen::conv::ProblemDescription& problem) const +size_t ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize( + const ::miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL switch(problem.GetInDataType()) @@ -589,9 +593,8 @@ ConvHipImplicitGemm3DGroupWrwXdlops::GetCKMaxWorkspaceSize(const miopen::conv::P return 0; // other types not applicable for this solver } -size_t -ConvHipImplicitGemm3DGroupWrwXdlops::GetWorkspaceSize(const ExecutionContext&, - const miopen::conv::ProblemDescription& problem) const +size_t ConvHipImplicitGemm3DGroupWrwXdlops::GetWorkspaceSize( + const ExecutionContext&, const ::miopen::conv::ProblemDescription& problem) const { auto ck_ws_size = GetCKMaxWorkspaceSize(problem); return GetWorkspaceSizeLayoutTransformConv(problem, ck_ws_size); @@ -599,7 +602,7 @@ ConvHipImplicitGemm3DGroupWrwXdlops::GetWorkspaceSize(const ExecutionContext&, PerformanceConfigHipImplicitGemm3DGroupWrwXdlops ConvHipImplicitGemm3DGroupWrwXdlops::Search(const ExecutionContext& ctx, - const miopen::conv::ProblemDescription& problem, + const ::miopen::conv::ProblemDescription& problem, const AnyInvokeParams& invoke_ctx) const { return GenericSearch(*this, ctx, problem, invoke_ctx); @@ -607,7 +610,7 @@ ConvHipImplicitGemm3DGroupWrwXdlops::Search(const ExecutionContext& ctx, bool ConvHipImplicitGemm3DGroupWrwXdlops::IsApplicable( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const miopen::conv::ProblemDescription& problem) const + [[maybe_unused]] const ::miopen::conv::ProblemDescription& problem) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL if(env::disabled(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS)) @@ -650,7 +653,7 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::IsApplicable( ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( [[maybe_unused]] const ExecutionContext& ctx, - [[maybe_unused]] const miopen::conv::ProblemDescription& problem, + [[maybe_unused]] const ::miopen::conv::ProblemDescription& problem, [[maybe_unused]] const PerformanceConfigHipImplicitGemm3DGroupWrwXdlops& config) const { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL @@ -715,7 +718,7 @@ ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( #if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( - const miopen::conv::ProblemDescription&) + const ::miopen::conv::ProblemDescription&) { // No-op stub for non-CK builds } From 7eece463c78cf7dedd185fc5b1472f9ff4cf2685 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Thu, 9 Oct 2025 13:12:28 +0000 Subject: [PATCH 20/26] added #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL guards around relevant functions --- .../conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 10 ++-------- .../conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 2 ++ .../conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 3 +++ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 5bd84c271b4c..3ee56b4c74a3 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -365,6 +365,7 @@ FillValidKernelsByAlphaBeta(const ::miopen::conv::ProblemDescription& problem) } } // namespace +#if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL template void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::Init( const ::miopen::conv::ProblemDescription& problem) @@ -404,6 +405,7 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::CheckCKApplicability( default: return IsCKApplicable, CKArgs>(problem); } } +#endif // # MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( const ::miopen::conv::ProblemDescription& problem) @@ -686,14 +688,6 @@ ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( #endif } -#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) -void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( - const ::miopen::conv::ProblemDescription&) -{ - // No-op stub for non-CK builds -} -#endif - } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index 36dcc16d2455..dae16de7d130 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -364,6 +364,7 @@ FillValidKernelsByAlphaBeta(const ::miopen::conv::ProblemDescription& problem) } } // namespace +#if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL template void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::Init( const ::miopen::conv::ProblemDescription& problem) @@ -403,6 +404,7 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::CheckCKApplicability( default: return IsCKApplicable, CKArgs>(problem); } } +#endif // MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( const ::miopen::conv::ProblemDescription& problem) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index bedba5d3142a..7c62ab86d203 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -328,6 +328,7 @@ FillValidKernelsByAlphaBeta(const ::miopen::conv::ProblemDescription& problem) } } // namespace +#if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL template void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::Init( const ::miopen::conv::ProblemDescription& problem) @@ -370,6 +371,8 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::CheckCKApplicability( return IsCKApplicable, CKArgs>(problem); } } +#endif // # MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL + void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( const ::miopen::conv::ProblemDescription& problem) { From fce70e0d37b4d7acd6ada993b515d83edccaef43 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Thu, 9 Oct 2025 14:00:03 +0000 Subject: [PATCH 21/26] expanded MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL guards --- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 4 +--- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 24 ++++++------------- ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 12 +--------- 3 files changed, 9 insertions(+), 31 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 3ee56b4c74a3..364efaf622d2 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -365,7 +365,6 @@ FillValidKernelsByAlphaBeta(const ::miopen::conv::ProblemDescription& problem) } } // namespace -#if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL template void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::Init( const ::miopen::conv::ProblemDescription& problem) @@ -405,7 +404,6 @@ bool ConvHipImplicitGemm3DGroupBwdXdlops::CheckCKApplicability( default: return IsCKApplicable, CKArgs>(problem); } } -#endif // # MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( const ::miopen::conv::ProblemDescription& problem) @@ -419,7 +417,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } -#endif +#endif // # MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index dae16de7d130..eeca559e6403 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -352,19 +352,18 @@ FillValidKernelsByAlphaBeta(const ::miopen::conv::ProblemDescription& problem) switch(problem.GetAlphaBetaCase()) { case BILINEAR: - return miopen::solver::FillValidKernelsIDs, - CKArgs>(problem); + return ::miopen::solver::FillValidKernelsIDs, + CKArgs>(problem); case SCALE: - return miopen::solver::FillValidKernelsIDs, - CKArgs>(problem); + return ::miopen::solver::FillValidKernelsIDs, + CKArgs>(problem); default: - return miopen::solver::FillValidKernelsIDs, - CKArgs>(problem); + return ::miopen::solver::FillValidKernelsIDs, + CKArgs>(problem); } } } // namespace -#if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL template void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::Init( const ::miopen::conv::ProblemDescription& problem) @@ -404,7 +403,6 @@ bool ConvHipImplicitGemm3DGroupFwdXdlops::CheckCKApplicability( default: return IsCKApplicable, CKArgs>(problem); } } -#endif // MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( const ::miopen::conv::ProblemDescription& problem) @@ -418,7 +416,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } -#endif +#endif // # MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) @@ -839,14 +837,6 @@ ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( #endif } -#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) -void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( - const ::miopen::conv::ProblemDescription&) -{ - // No-op stub for non-CK builds -} -#endif - } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 7c62ab86d203..2829c45b2a0d 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -328,7 +328,6 @@ FillValidKernelsByAlphaBeta(const ::miopen::conv::ProblemDescription& problem) } } // namespace -#if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL template void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::Init( const ::miopen::conv::ProblemDescription& problem) @@ -371,7 +370,6 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::CheckCKApplicability( return IsCKApplicable, CKArgs>(problem); } } -#endif // # MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( const ::miopen::conv::ProblemDescription& problem) @@ -385,8 +383,8 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } -#endif +#endif // # MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) { @@ -719,14 +717,6 @@ ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( #endif } -#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) -void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( - const ::miopen::conv::ProblemDescription&) -{ - // No-op stub for non-CK builds -} -#endif - } // namespace conv } // namespace solver } // namespace miopen From 8273db4df738612a3fdd4d1245198211718cbdfb Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Fri, 10 Oct 2025 10:05:03 +0000 Subject: [PATCH 22/26] Added different guards to save Azure CI build (=without AI heuristics) --- .../conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 2 ++ .../conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 2 ++ .../conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 2 ++ 3 files changed, 6 insertions(+) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 364efaf622d2..74a75202bcd4 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -41,10 +41,12 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS_AI #include #include #include +#if MIOPEN_ENABLE_AI_KERNEL_TUNING #include #include #include #endif +#endif #include namespace miopen { diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index eeca559e6403..470eec4609d2 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -42,10 +42,12 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI #include #include #include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp" +#if MIOPEN_ENABLE_AI_KERNEL_TUNING #include #include #include #endif +#endif #include namespace miopen { diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 2829c45b2a0d..6f767ddf0133 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -38,10 +38,12 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS_AI #include #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL #include +#if MIOPEN_ENABLE_AI_KERNEL_TUNING #include #include #include #endif +#endif #include #include From dc2d70fc678468b859c2a1d8cbb52e078530c741 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Fri, 10 Oct 2025 11:05:27 +0000 Subject: [PATCH 23/26] Reverted changes to MIOPEN_BACKEND_HIPP guards --- ...ip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 10 ++++++++- ...ip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 22 +++++++++++++------ ...ip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 11 ++++++++-- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 74a75202bcd4..98ebcbacfb5a 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -419,7 +419,7 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } -#endif // # MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL +#endif void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) @@ -688,6 +688,14 @@ ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( #endif } +#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( + const ::miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} +#endif + } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index 470eec4609d2..eef231a7abdd 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -354,14 +354,14 @@ FillValidKernelsByAlphaBeta(const ::miopen::conv::ProblemDescription& problem) switch(problem.GetAlphaBetaCase()) { case BILINEAR: - return ::miopen::solver::FillValidKernelsIDs, - CKArgs>(problem); + return miopen::solver::FillValidKernelsIDs, + CKArgs>(problem); case SCALE: - return ::miopen::solver::FillValidKernelsIDs, - CKArgs>(problem); + return miopen::solver::FillValidKernelsIDs, + CKArgs>(problem); default: - return ::miopen::solver::FillValidKernelsIDs, - CKArgs>(problem); + return miopen::solver::FillValidKernelsIDs, + CKArgs>(problem); } } } // namespace @@ -418,7 +418,7 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } -#endif // # MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL +#endif void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) @@ -839,6 +839,14 @@ ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( #endif } +#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( + const ::miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} +#endif + } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 6f767ddf0133..c4ad6c252b79 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -372,7 +372,6 @@ bool ConvHipImplicitGemm3DGroupWrwXdlops::CheckCKApplicability( return IsCKApplicable, CKArgs>(problem); } } - void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( const ::miopen::conv::ProblemDescription& problem) { @@ -385,8 +384,8 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } +#endif -#endif // # MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( const miopen::ExecutionContext& ctx, const ::miopen::conv::ProblemDescription& problem) { @@ -719,6 +718,14 @@ ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( #endif } +#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( + const ::miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} +#endif + } // namespace conv } // namespace solver } // namespace miopen From 5547f192134eb0b8bcf891b87d1fed0c176127aa Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Mon, 13 Oct 2025 08:41:28 +0000 Subject: [PATCH 24/26] Slightly reorganised includes to make them more neat. Moved up stub implementation ( --- ...v_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 16 +++++++--------- ...v_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 16 +++++++--------- ...v_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 16 +++++++--------- 3 files changed, 21 insertions(+), 27 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 98ebcbacfb5a..2f279737dfdf 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -38,6 +38,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS_AI #include #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL #include +#include #include #include #include @@ -47,7 +48,6 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS_AI #include #endif #endif -#include namespace miopen { namespace solver { @@ -419,6 +419,12 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } +#else +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( + const ::miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} #endif void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( @@ -688,14 +694,6 @@ ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( #endif } -#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) -void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( - const ::miopen::conv::ProblemDescription&) -{ - // No-op stub for non-CK builds -} -#endif - } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index eef231a7abdd..90c35428b3b1 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -39,6 +39,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI #include #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL #include +#include #include #include #include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp" @@ -48,7 +49,6 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI #include #endif #endif -#include namespace miopen { namespace solver { @@ -418,6 +418,12 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } +#else +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( + const ::miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} #endif void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( @@ -839,14 +845,6 @@ ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( #endif } -#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) -void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( - const ::miopen::conv::ProblemDescription&) -{ - // No-op stub for non-CK builds -} -#endif - } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index c4ad6c252b79..1ada2e1eb582 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -38,13 +38,13 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS_AI #include #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL #include +#include #if MIOPEN_ENABLE_AI_KERNEL_TUNING #include #include #include #endif #endif -#include #include namespace miopen { @@ -384,6 +384,12 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } +#else +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( + const ::miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} #endif void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( @@ -718,14 +724,6 @@ ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( #endif } -#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) -void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( - const ::miopen::conv::ProblemDescription&) -{ - // No-op stub for non-CK builds -} -#endif - } // namespace conv } // namespace solver } // namespace miopen From b3cccf4ceb0b68beda78345e33a61088acf29cb8 Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Mon, 13 Oct 2025 14:24:31 +0000 Subject: [PATCH 25/26] Revert "Slightly reorganised includes to make them more neat. Moved up stub implementation (" This reverts commit 5547f192134eb0b8bcf891b87d1fed0c176127aa. --- ...v_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 16 +++++++++------- ...v_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 16 +++++++++------- ...v_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 16 +++++++++------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 2f279737dfdf..98ebcbacfb5a 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -38,7 +38,6 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS_AI #include #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL #include -#include #include #include #include @@ -48,6 +47,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_BWD_XDLOPS_AI #include #endif #endif +#include namespace miopen { namespace solver { @@ -419,12 +419,6 @@ void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } -#else -void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( - const ::miopen::conv::ProblemDescription&) -{ - // No-op stub for non-CK builds -} #endif void PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::HeuristicInit( @@ -694,6 +688,14 @@ ConvSolution ConvHipImplicitGemm3DGroupBwdXdlops::GetSolution( #endif } +#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupBwdXdlops::InitValidKernels( + const ::miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} +#endif + } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index 90c35428b3b1..eef231a7abdd 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -39,7 +39,6 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI #include #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL #include -#include #include #include #include "ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp" @@ -49,6 +48,7 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_FWD_XDLOPS_AI #include #endif #endif +#include namespace miopen { namespace solver { @@ -418,12 +418,6 @@ void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } -#else -void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( - const ::miopen::conv::ProblemDescription&) -{ - // No-op stub for non-CK builds -} #endif void PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::HeuristicInit( @@ -845,6 +839,14 @@ ConvSolution ConvHipImplicitGemm3DGroupFwdXdlops::GetSolution( #endif } +#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupFwdXdlops::InitValidKernels( + const ::miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} +#endif + } // namespace conv } // namespace solver } // namespace miopen diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index 1ada2e1eb582..c4ad6c252b79 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -38,13 +38,13 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_3D_CONV_IMPLICIT_GEMM_HIP_WRW_XDLOPS_AI #include #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL #include -#include #if MIOPEN_ENABLE_AI_KERNEL_TUNING #include #include #include #endif #endif +#include #include namespace miopen { @@ -384,12 +384,6 @@ void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( default: break; // Unsupported data types - valid_kernels remains empty } } -#else -void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( - const ::miopen::conv::ProblemDescription&) -{ - // No-op stub for non-CK builds -} #endif void PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::HeuristicInit( @@ -724,6 +718,14 @@ ConvSolution ConvHipImplicitGemm3DGroupWrwXdlops::GetSolution( #endif } +#if !(MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL) +void miopen::solver::conv::PerformanceConfigHipImplicitGemm3DGroupWrwXdlops::InitValidKernels( + const ::miopen::conv::ProblemDescription&) +{ + // No-op stub for non-CK builds +} +#endif + } // namespace conv } // namespace solver } // namespace miopen From 721d9027c303fea6c5345c150d1694611a7163ea Mon Sep 17 00:00:00 2001 From: amd-bartgips Date: Wed, 15 Oct 2025 14:30:02 +0000 Subject: [PATCH 26/26] fixed clang-format --- ...v_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp | 16 ++++++++-------- ...v_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp | 16 ++++++++-------- ...v_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp | 16 ++++++++-------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp index 98ebcbacfb5a..91bc0c9cf82d 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_bwd_xdlops.cpp @@ -185,21 +185,21 @@ struct CKArgs } filter_strides = {ProblemInterpreter::GetAdjustedConvolutionStrideD(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; + ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), + ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; filter_dilations = {ProblemInterpreter::GetAdjustedConvolutionDilationD(problem), ProblemInterpreter::GetAdjustedConvolutionDilationH(problem), ProblemInterpreter::GetAdjustedConvolutionDilationW(problem)}; lPadding = {ProblemInterpreter::GetInputLeftPadD(problem), - ProblemInterpreter::GetInputLeftPadH(problem), - ProblemInterpreter::GetInputLeftPadW(problem)}; + ProblemInterpreter::GetInputLeftPadH(problem), + ProblemInterpreter::GetInputLeftPadW(problem)}; rPadding = {ProblemInterpreter::GetAdjustedInputRightPadD(problem), - ProblemInterpreter::GetAdjustedInputRightPadH(problem), - ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; + ProblemInterpreter::GetAdjustedInputRightPadH(problem), + ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; } - CKArgs(const CKArgs&) = default; - CKArgs(CKArgs&&) = default; + CKArgs(const CKArgs&) = default; + CKArgs(CKArgs&&) = default; CKArgs& operator=(const CKArgs&) = default; template diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp index eef231a7abdd..b720c9611d66 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_fwd_xdlops.cpp @@ -180,21 +180,21 @@ struct CKArgs } filter_strides = {ProblemInterpreter::GetAdjustedConvolutionStrideD(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; + ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), + ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; filter_dilations = {ProblemInterpreter::GetAdjustedConvolutionDilationD(problem), ProblemInterpreter::GetAdjustedConvolutionDilationH(problem), ProblemInterpreter::GetAdjustedConvolutionDilationW(problem)}; lPadding = {ProblemInterpreter::GetInputLeftPadD(problem), - ProblemInterpreter::GetInputLeftPadH(problem), - ProblemInterpreter::GetInputLeftPadW(problem)}; + ProblemInterpreter::GetInputLeftPadH(problem), + ProblemInterpreter::GetInputLeftPadW(problem)}; rPadding = {ProblemInterpreter::GetAdjustedInputRightPadD(problem), - ProblemInterpreter::GetAdjustedInputRightPadH(problem), - ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; + ProblemInterpreter::GetAdjustedInputRightPadH(problem), + ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; } - CKArgs(const CKArgs&) = default; - CKArgs(CKArgs&&) noexcept = default; + CKArgs(const CKArgs&) = default; + CKArgs(CKArgs&&) noexcept = default; CKArgs& operator=(const CKArgs&) = default; template diff --git a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp index c4ad6c252b79..9384a573e8fe 100644 --- a/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp +++ b/projects/miopen/src/solver/conv/conv_hip_implicit_gemm_3d_grouped_wrw_xdlops.cpp @@ -115,20 +115,20 @@ struct CKArgs } filter_strides = {ProblemInterpreter::GetAdjustedConvolutionStrideD(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), - ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; + ProblemInterpreter::GetAdjustedConvolutionStrideH(problem), + ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)}; filter_dilations = {ProblemInterpreter::GetAdjustedConvolutionDilationD(problem), ProblemInterpreter::GetAdjustedConvolutionDilationH(problem), ProblemInterpreter::GetAdjustedConvolutionDilationW(problem)}; lPadding = {ProblemInterpreter::GetInputLeftPadD(problem), - ProblemInterpreter::GetInputLeftPadH(problem), - ProblemInterpreter::GetInputLeftPadW(problem)}; + ProblemInterpreter::GetInputLeftPadH(problem), + ProblemInterpreter::GetInputLeftPadW(problem)}; rPadding = {ProblemInterpreter::GetAdjustedInputRightPadD(problem), - ProblemInterpreter::GetAdjustedInputRightPadH(problem), - ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; + ProblemInterpreter::GetAdjustedInputRightPadH(problem), + ProblemInterpreter::GetAdjustedInputRightPadW(problem)}; } - CKArgs(const CKArgs&) = default; - CKArgs(CKArgs&&) = default; + CKArgs(const CKArgs&) = default; + CKArgs(CKArgs&&) = default; CKArgs& operator=(const CKArgs&) = default; template