From 26b56b68c5c7bb996fd1d4c532579a32b21ea05b Mon Sep 17 00:00:00 2001 From: VENKATA NARRA Date: Wed, 29 Apr 2026 10:12:53 -0700 Subject: [PATCH] [AI Generated] BugFix: Scale kdump timeouts and crashkernel size for large memory VMs - Tiered crashkernel allocation: 4G for >8TB, 2G for >4TB, 1G for >1TB RAM - Tiered dump timeout: 4800s for >8TB, 3600s for >4TB, 2400s for >1TB RAM - Tiered reboot timeout: 1200s for >4TB, 900s for >1TB, 600s default - Fixes kdumpcrash_validate_on_cpu415 OSProvisioningTimeout on M416 VMs --- lisa/tools/kdump.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/lisa/tools/kdump.py b/lisa/tools/kdump.py index ae4b7931d5..578873180f 100644 --- a/lisa/tools/kdump.py +++ b/lisa/tools/kdump.py @@ -294,8 +294,16 @@ def calculate_crashkernel_size(self, total_memory: str) -> str: and float(total_memory.strip("M")) < 2048 ): crash_kernel = "256M" - elif "T" in total_memory and float(total_memory.strip("T")) > 1: - crash_kernel = "1G" + elif "T" in total_memory: + total_memory_tb = float(total_memory.strip("T")) + if total_memory_tb > 8: + crash_kernel = "4G" + elif total_memory_tb > 4: + crash_kernel = "2G" + elif total_memory_tb > 1: + crash_kernel = "1G" + else: + crash_kernel = "512M" else: crash_kernel = "512M" return crash_kernel @@ -854,8 +862,14 @@ def kdump_test( # change the dump path and increase the timeout duration kdump.config_resource_disk_dump_path(self._get_disk_dump_path()) self.timeout_of_dump_crash = 1200 - if "T" in total_memory and float(total_memory.strip("T")) > 6: - self.timeout_of_dump_crash = 2000 + if "T" in total_memory: + total_memory_tb = float(total_memory.strip("T")) + if total_memory_tb > 8: + self.timeout_of_dump_crash = 4800 + elif total_memory_tb > 4: + self.timeout_of_dump_crash = 3600 + elif total_memory_tb > 1: + self.timeout_of_dump_crash = 2400 kdump.config_crashkernel_memory(self.crash_kernel) kdump.enable_kdump_service() @@ -866,7 +880,15 @@ def kdump_test( self.node.execute(f"rm -rf {kdump.dump_path}/*", shell=True, sudo=True) # Reboot system to make kdump take effect - self.node.reboot(time_out=600) + # Large memory VMs (multi-TB) need more time for memory initialization + reboot_timeout = 600 + if "T" in total_memory: + total_memory_tb = float(total_memory.strip("T")) + if total_memory_tb > 4: + reboot_timeout = 1200 + elif total_memory_tb > 1: + reboot_timeout = 900 + self.node.reboot(time_out=reboot_timeout) # Confirm that the kernel dump mechanism is enabled kdump.check_crashkernel_loaded(self.crash_kernel)