From 4ede34845c93617fb34ca72a8a6dba31015a49ba Mon Sep 17 00:00:00 2001 From: Masashi Sode <39261814+MasashiSode@users.noreply.github.com> Date: Fri, 6 Mar 2026 11:46:53 -0500 Subject: [PATCH 1/5] Add GPU memory management in Solver class; implement method to release CuPy memory pools --- fullwave/solver/pml_builder.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/fullwave/solver/pml_builder.py b/fullwave/solver/pml_builder.py index 4466a24..99ad5d2 100644 --- a/fullwave/solver/pml_builder.py +++ b/fullwave/solver/pml_builder.py @@ -597,14 +597,17 @@ def _extend_arrays_gpu( ) return self._extend_arrays_multi_gpu(named_arrays, n_gpus) except Exception: - logger.warning("Multi-GPU extension failed. Falling back to sequential single-GPU.") + logger.warning( + "Multi-GPU extension failed. Falling back to sequential single-GPU.", + exc_info=True, + ) # Strategy 2: Sequential single-GPU (extend one, free, repeat) try: logger.info("Extending %d medium arrays sequentially on GPU 0.", len(named_arrays)) return self._extend_arrays_sequential_gpu(named_arrays) except Exception: - logger.warning("Single-GPU extension failed. Falling back to CPU.") + logger.warning("Single-GPU extension failed. Falling back to CPU.", exc_info=True) # Strategy 3: CPU logger.info("Extending %d medium arrays on CPU.", len(named_arrays)) @@ -669,13 +672,21 @@ def _extend_arrays_cpu( named_arrays: list[tuple[str, NDArray]], ) -> dict[str, NDArray]: """Extend arrays on CPU using ThreadPoolExecutor.""" + # Convert any CuPy arrays to numpy before CPU fallback + numpy_arrays = [] + for name, arr in named_arrays: + if hasattr(arr, "get"): + numpy_arrays.append((name, arr.get())) + else: + numpy_arrays.append((name, arr)) + orig_xp = self.xp self.xp = np try: with concurrent.futures.ThreadPoolExecutor() as executor: futures = { name: executor.submit(self._extend_map_for_pml, arr) - for name, arr in named_arrays + for name, arr in numpy_arrays } return {name: future.result() for name, future in futures.items()} finally: From 2af887fc0080dc4980720070e2690d25625710d2 Mon Sep 17 00:00:00 2001 From: Masashi Sode <39261814+MasashiSode@users.noreply.github.com> Date: Fri, 6 Mar 2026 12:47:16 -0500 Subject: [PATCH 2/5] Add method to move named arrays to CPU and free GPU memory pools --- fullwave/solver/pml_builder.py | 46 ++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/fullwave/solver/pml_builder.py b/fullwave/solver/pml_builder.py index 99ad5d2..c02043a 100644 --- a/fullwave/solver/pml_builder.py +++ b/fullwave/solver/pml_builder.py @@ -3,6 +3,7 @@ from __future__ import annotations import concurrent.futures +import gc import logging from collections import OrderedDict from dataclasses import dataclass, field @@ -563,6 +564,33 @@ def _extend_map_for_pml( return output + def _move_named_arrays_to_cpu( + self, + named_arrays: list[tuple[str, NDArray]], + attr_names: list[str] | None = None, + ) -> list[tuple[str, NDArray]]: + """Convert CuPy arrays to numpy and free GPU memory pools. + + Also replaces the corresponding ``medium_org`` attributes (given by + *attr_names*) so the original GPU arrays can be garbage-collected. + """ + import cupy as cp # noqa: PLC0415 + + numpy_arrays = [] + for name, arr in named_arrays: + if hasattr(arr, "get"): + arr_np = arr.get() + numpy_arrays.append((name, arr_np)) + # Update medium_org so the CuPy array can be freed + if attr_names and name in attr_names: + setattr(self.medium_org, name, arr_np) + else: + numpy_arrays.append((name, arr)) + gc.collect() + cp.get_default_memory_pool().free_all_blocks() + cp.get_default_pinned_memory_pool().free_all_blocks() + return numpy_arrays + def _extend_arrays_gpu( self, named_arrays: list[tuple[str, NDArray]], @@ -571,8 +599,8 @@ def _extend_arrays_gpu( Parameters ---------- - named_arrays : list of (name, numpy_array) pairs - Arrays to extend. Must be numpy arrays (CPU). + named_arrays : list of (name, numpy_array) or (name, cupy_array) pairs + Arrays to extend. Returns ------- @@ -585,6 +613,8 @@ def _extend_arrays_gpu( n_gpus = cp.cuda.runtime.getDeviceCount() logger.info("CUDA devices available: %d", n_gpus) + attr_names = [name for name, _ in named_arrays] + # Strategy 1: Multi-GPU parallel if n_gpus > 1: n_workers = min(n_gpus, len(named_arrays)) @@ -598,18 +628,14 @@ def _extend_arrays_gpu( return self._extend_arrays_multi_gpu(named_arrays, n_gpus) except Exception: logger.warning( - "Multi-GPU extension failed. Falling back to sequential single-GPU.", + "Multi-GPU extension failed. Falling back to CPU.", exc_info=True, ) - # Strategy 2: Sequential single-GPU (extend one, free, repeat) - try: - logger.info("Extending %d medium arrays sequentially on GPU 0.", len(named_arrays)) - return self._extend_arrays_sequential_gpu(named_arrays) - except Exception: - logger.warning("Single-GPU extension failed. Falling back to CPU.", exc_info=True) + # Move arrays to CPU and free GPU memory before CPU fallback. + # The original medium arrays may still occupy GPU memory. + named_arrays = self._move_named_arrays_to_cpu(named_arrays, attr_names) - # Strategy 3: CPU logger.info("Extending %d medium arrays on CPU.", len(named_arrays)) return self._extend_arrays_cpu(named_arrays) From 42a8b5c533e9c94d6ad222550d6b2d0d5718414a Mon Sep 17 00:00:00 2001 From: Masashi Sode <39261814+MasashiSode@users.noreply.github.com> Date: Fri, 6 Mar 2026 12:47:26 -0500 Subject: [PATCH 3/5] =?UTF-8?q?Bump=20version:=201.2.6-dev8=20=E2=86=92=20?= =?UTF-8?q?1.2.6-dev9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.toml | 2 +- fullwave/__init__.py | 2 +- pyproject.toml | 2 +- uv.lock | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index 2905593..071ac94 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "1.2.6-dev8" +current_version = "1.2.6-dev9" parse = """(?x) (?P0|[1-9]\\d*)\\. (?P0|[1-9]\\d*)\\. diff --git a/fullwave/__init__.py b/fullwave/__init__.py index 03caafd..3a32838 100644 --- a/fullwave/__init__.py +++ b/fullwave/__init__.py @@ -60,7 +60,7 @@ __version__ = version("fullwave") except PackageNotFoundError: # Update via bump-my-version, not manually - __version__ = "1.2.6-dev8" + __version__ = "1.2.6-dev9" VERSION = __version__ # for convenience logger.info("Fullwave version: %s", __version__) diff --git a/pyproject.toml b/pyproject.toml index 9365b50..26cc9d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fullwave25" -version = "1.2.6-dev8" # Update via bump-my-version, not manually +version = "1.2.6-dev9" # Update via bump-my-version, not manually description = "Fullwave 2.5: Ultrasound wave propagation simulation with heterogeneous power law attenuation modelling capabilities" readme = "README.md" requires-python = ">=3.10" diff --git a/uv.lock b/uv.lock index fca99ac..c874411 100644 --- a/uv.lock +++ b/uv.lock @@ -735,7 +735,7 @@ wheels = [ [[package]] name = "fullwave25" -version = "1.2.6.dev8" +version = "1.2.6.dev9" source = { editable = "." } dependencies = [ { name = "matplotlib" }, From ca4f354b9d1fb8de9b649de9dcb297cba99c32a7 Mon Sep 17 00:00:00 2001 From: Masashi Sode <39261814+MasashiSode@users.noreply.github.com> Date: Sun, 29 Mar 2026 23:55:48 -0400 Subject: [PATCH 4/5] Add phase correction for fractional-sample delays in gaussian_modulated_sinusoidal_signal and enhance tests for peak polarity preservation --- fullwave/utils/pulse.py | 17 ++++++++++--- tests/test_pulse.py | 56 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/fullwave/utils/pulse.py b/fullwave/utils/pulse.py index ae09d28..85f5237 100644 --- a/fullwave/utils/pulse.py +++ b/fullwave/utils/pulse.py @@ -61,6 +61,7 @@ def gaussian_modulated_sinusoidal_signal( dt = duration / nt t_offset = ncycles / f0 + delay_sec + phase_correction = 0.0 if i_layer is not None: if dt_for_layer_delay is None: error_msg = "dt_for_layer_delay must be provided if i_layer is provided" @@ -68,7 +69,17 @@ def gaussian_modulated_sinusoidal_signal( if cfl_for_layer_delay is None: error_msg = "cfl_for_layer_delay must be provided if i_layer is provided" raise ValueError(error_msg) - t_offset += (dt_for_layer_delay / cfl_for_layer_delay) * i_layer + layer_delay_sec = (dt_for_layer_delay / cfl_for_layer_delay) * i_layer + t_offset += layer_delay_sec + # Correct carrier phase for fractional-sample delays. + # A continuous time shift of a fractional number of samples changes + # the carrier phase at each discrete sample point, causing sign + # inversion and wrong peak positions. Remove the fractional phase + # offset so the carrier stays on the same discrete phase grid as + # the base (i_layer=0) signal. + delay_samples = layer_delay_sec / dt + fractional_samples = delay_samples - round(delay_samples) + phase_correction = 2.0 * np.pi * f0 * fractional_samples * dt t = np.arange(nt, dtype=dtype) * dt - t_offset @@ -87,5 +98,5 @@ def gaussian_modulated_sinusoidal_signal( else: env = np.exp(-(a_sq**drop_off)) - # Compute final signal - return env * np.sin(w_t) * p0 + # Compute final signal with phase-corrected carrier + return env * np.sin(w_t + phase_correction) * p0 diff --git a/tests/test_pulse.py b/tests/test_pulse.py index 32d0f4a..de1b5ed 100644 --- a/tests/test_pulse.py +++ b/tests/test_pulse.py @@ -76,6 +76,62 @@ def test_float32_dtype(base_params): assert y.dtype == np.float32 +def test_fractional_layer_delay_preserves_phase(base_params): + """Fractional-sample layer delays must preserve peak polarity. + + Previously, fractional delays caused carrier phase errors that inverted + the peak sign for odd layers (Finding 19 in cross-physics experiment). + """ + # cfl=0.3 => dt_layer/cfl = 3.33e-8 s per i_layer + # dt_sim = 2.5e-9 s => 13.33 sim samples per i_layer (fractional) + dt_layer_frac = 1e-8 + cfl_frac = 0.3 + + y_base = gaussian_modulated_sinusoidal_signal(**base_params) + base_peak_sign = np.sign(y_base[np.argmax(np.abs(y_base))]) + + for i in range(1, 6): + y_layer = gaussian_modulated_sinusoidal_signal( + **base_params, + i_layer=i, + dt_for_layer_delay=dt_layer_frac, + cfl_for_layer_delay=cfl_frac, + ) + peak_idx = np.argmax(np.abs(y_layer)) + layer_peak_sign = np.sign(y_layer[peak_idx]) + assert layer_peak_sign == base_peak_sign, ( + f"i_layer={i}: peak sign {layer_peak_sign} != base sign {base_peak_sign}" + ) + + +def test_integer_layer_delay_unchanged(base_params): + """Integer-sample layer delays should produce NCC ~ 1.0 vs manual shift.""" + dt_layer = 1e-8 + cfl_layer = 0.4 + # delay per i_layer = 2.5e-8 s = 10 sim samples (integer) + dt_sim = base_params["duration"] / base_params["nt"] + delay_per_layer_samples = (dt_layer / cfl_layer) / dt_sim + assert delay_per_layer_samples == 10.0 # confirm integer + + y_base = gaussian_modulated_sinusoidal_signal(**base_params) + + for i_layer in [1, 2, 4]: + y_layer = gaussian_modulated_sinusoidal_signal( + **base_params, + i_layer=i_layer, + dt_for_layer_delay=dt_layer, + cfl_for_layer_delay=cfl_layer, + ) + # Manual integer shift + shift = int(round(delay_per_layer_samples * i_layer)) + y_manual = np.zeros_like(y_base) + y_manual[shift:] = y_base[: len(y_base) - shift] + + # NCC should be very high + ncc = np.dot(y_layer, y_manual) / (np.linalg.norm(y_layer) * np.linalg.norm(y_manual)) + assert ncc > 0.999, f"i_layer={i_layer}: NCC={ncc:.6f}, expected > 0.999" + + def test_performance(base_params): """Ensure the function completes in well under 1 second.""" start = time.perf_counter() From f221473fbaf6961ca88991a5346e102d994aaee0 Mon Sep 17 00:00:00 2001 From: Masashi Sode <39261814+MasashiSode@users.noreply.github.com> Date: Mon, 30 Mar 2026 02:09:59 -0400 Subject: [PATCH 5/5] Fix fractional layer delay to preserve peak polarity by matching manual integer shifts --- tests/test_pulse.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/test_pulse.py b/tests/test_pulse.py index de1b5ed..d0eccf8 100644 --- a/tests/test_pulse.py +++ b/tests/test_pulse.py @@ -77,18 +77,21 @@ def test_float32_dtype(base_params): def test_fractional_layer_delay_preserves_phase(base_params): - """Fractional-sample layer delays must preserve peak polarity. + """Fractional-sample layer delays must match a manual integer shift. Previously, fractional delays caused carrier phase errors that inverted the peak sign for odd layers (Finding 19 in cross-physics experiment). + The fix corrects carrier phase so that the layered signal matches an + integer-sample-shifted version of the base (high NCC, same peak sign). """ # cfl=0.3 => dt_layer/cfl = 3.33e-8 s per i_layer # dt_sim = 2.5e-9 s => 13.33 sim samples per i_layer (fractional) dt_layer_frac = 1e-8 cfl_frac = 0.3 + dt_sim = base_params["duration"] / base_params["nt"] + delay_per_layer_sec = dt_layer_frac / cfl_frac y_base = gaussian_modulated_sinusoidal_signal(**base_params) - base_peak_sign = np.sign(y_base[np.argmax(np.abs(y_base))]) for i in range(1, 6): y_layer = gaussian_modulated_sinusoidal_signal( @@ -97,11 +100,14 @@ def test_fractional_layer_delay_preserves_phase(base_params): dt_for_layer_delay=dt_layer_frac, cfl_for_layer_delay=cfl_frac, ) - peak_idx = np.argmax(np.abs(y_layer)) - layer_peak_sign = np.sign(y_layer[peak_idx]) - assert layer_peak_sign == base_peak_sign, ( - f"i_layer={i}: peak sign {layer_peak_sign} != base sign {base_peak_sign}" - ) + # Manual integer shift of base signal + shift = round(delay_per_layer_sec * i / dt_sim) + y_manual = np.zeros_like(y_base) + y_manual[shift:] = y_base[: len(y_base) - shift] + + # NCC must be high (signals should be nearly identical) + ncc = np.dot(y_layer, y_manual) / (np.linalg.norm(y_layer) * np.linalg.norm(y_manual)) + assert ncc > 0.999, f"i_layer={i}: NCC={ncc:.6f}, expected > 0.999" def test_integer_layer_delay_unchanged(base_params):