From 87dfcdbef9c264cf7ff294cf4e04817cc636461b Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 17 Feb 2026 00:08:15 -0500 Subject: [PATCH 01/27] add pyfftw sdp and check for fftw/pyfftw --- stumpy/sdp.py | 189 ++++++++++++++++++++++++++++++++++++++++++++++ test.sh | 50 +++++++++--- tests/test_sdp.py | 25 ++++++ 3 files changed, 255 insertions(+), 9 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index bd1d8fea7..12ef280d5 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -6,6 +6,13 @@ from . import config +try: # pragma: no cover + import pyfftw + + FFTW_IS_AVAILABLE = True +except ImportError: # pragma: no cover + FFTW_IS_AVAILABLE = False + @njit(fastmath=config.STUMPY_FASTMATH_TRUE) def _njit_sliding_dot_product(Q, T): @@ -109,6 +116,188 @@ def _pocketfft_sliding_dot_product(Q, T): return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] +class _PYFFTW_SLIDING_DOT_PRODUCT: + """ + A class to compute the sliding dot product using FFTW via pyfftw. + + This class uses FFTW (via pyfftw) to efficiently compute the sliding dot product + between a query sequence Q and a time series T. It preallocates arrays and caches + FFTW objects to optimize repeated computations with similar-sized inputs. + + Parameters + ---------- + max_n : int, default=2**20 + Maximum length to preallocate arrays for. This will be the size of the + real-valued array. A complex-valued array of size `1 + (max_n // 2)` + will also be preallocated. If inputs exceed this size, arrays will be + reallocated to accommodate larger sizes. + + Attributes + ---------- + real_arr : pyfftw.empty_aligned + Preallocated real-valued array for FFTW computations. + + complex_arr : pyfftw.empty_aligned + Preallocated complex-valued array for FFTW computations. + + rfft_objects : dict + Cache of FFTW forward transform objects, keyed by + (next_fast_n, n_threads, planning_flag). + + irfft_objects : dict + Cache of FFTW inverse transform objects, keyed by + (next_fast_n, n_threads, planning_flag). + + Notes + ----- + The class maintains internal caches of FFTW objects to avoid redundant planning + operations when called multiple times with similar-sized inputs and parameters. + + Examples + -------- + >>> sdp_obj = _PYFFTW_SLIDING_DOT_PRODUCT(max_n=1000) + >>> Q = np.array([1, 2, 3]) + >>> T = np.array([4, 5, 6, 7, 8]) + >>> result = sdp_obj(Q, T) + + References + ---------- + `FFTW documentation `__ + + `pyfftw documentation `__ + """ + + def __init__(self, max_n=2**20): + """ + Initialize the `_PYFFTW_SLIDING_DOT_PRODUCT` object, which can be called + to compute the sliding dot product using FFTW via pyfftw. + + Parameters + ---------- + max_n : int, default=2**20 + Maximum length to preallocate arrays for. This will be the size of the + real-valued array. A complex-valued array of size `1 + (max_n // 2)` + will also be preallocated. + + Returns + ------- + None + """ + # Preallocate arrays + self.real_arr = pyfftw.empty_aligned(max_n, dtype="float64") + self.complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype="complex128") + + # Store FFTW objects, keyed by (next_fast_n, n_threads, planning_flag) + self.rfft_objects = {} + self.irfft_objects = {} + + def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): + """ + Compute the sliding dot product between `Q` and `T` using FFTW via pyfftw, + and cache FFTW objects if not already cached. + + Parameters + ---------- + Q : numpy.ndarray + Query array or subsequence. + + T : numpy.ndarray + Time series or sequence. + + n_threads : int, default=1 + Number of threads to use for FFTW computations. + + planning_flag : str, default="FFTW_ESTIMATE" + The planning flag that will be used in FFTW for planning. + See pyfftw documentation for details. Current options, ordered + ascendingly by the level of aggressiveness in planning, are: + "FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", and "FFTW_EXHAUSTIVE". + The more aggressive the planning, the longer the planning time, but + the faster the execution time. + + Returns + ------- + out : numpy.ndarray + Sliding dot product between `Q` and `T`. + + Notes + ----- + The planning_flag is defaulted to "FFTW_ESTIMATE" to be aligned with + MATLAB's FFTW usage (as of version R2025b) + See: https://www.mathworks.com/help/matlab/ref/fftw.html + + This implementation is inspired by the answer on StackOverflow: + https://stackoverflow.com/a/30615425/2955541 + """ + m = Q.shape[0] + n = T.shape[0] + next_fast_n = pyfftw.next_fast_len(n) + + # Update preallocated arrays if needed + if next_fast_n > len(self.real_arr): + self.real_arr = pyfftw.empty_aligned(next_fast_n, dtype="float64") + self.complex_arr = pyfftw.empty_aligned( + 1 + (next_fast_n // 2), dtype="complex128" + ) + + real_arr = self.real_arr[:next_fast_n] + complex_arr = self.complex_arr[: 1 + (next_fast_n // 2)] + + # Get or create FFTW objects + key = (next_fast_n, n_threads, planning_flag) + + rfft_obj = self.rfft_objects.get(key, None) + if rfft_obj is None: + rfft_obj = pyfftw.FFTW( + input_array=real_arr, + output_array=complex_arr, + direction="FFTW_FORWARD", + flags=(planning_flag,), + threads=n_threads, + ) + self.rfft_objects[key] = rfft_obj + else: + rfft_obj.update_arrays(real_arr, complex_arr) + + irfft_obj = self.irfft_objects.get(key, None) + if irfft_obj is None: + irfft_obj = pyfftw.FFTW( + input_array=complex_arr, + output_array=real_arr, + direction="FFTW_BACKWARD", + flags=(planning_flag, "FFTW_DESTROY_INPUT"), + threads=n_threads, + ) + self.irfft_objects[key] = irfft_obj + else: + irfft_obj.update_arrays(complex_arr, real_arr) + + # RFFT(T) + real_arr[:n] = T + real_arr[n:] = 0.0 + rfft_obj.execute() # output is in complex_arr + complex_arr_T = complex_arr.copy() + + # RFFT(Q) + # Scale by 1/next_fast_n to account for + # FFTW's unnormalized inverse FFT via execute() + np.multiply(Q[::-1], 1.0 / next_fast_n, out=real_arr[:m]) + real_arr[m:] = 0.0 + rfft_obj.execute() # output is in complex_arr + + # RFFT(T) * RFFT(Q) + np.multiply(complex_arr, complex_arr_T, out=complex_arr) + + # IRFFT (input is in complex_arr) + irfft_obj.execute() # output is in real_arr + + return real_arr[m - 1 : n] + + +if FFTW_IS_AVAILABLE: + _pyfftw_sliding_dot_product = _PYFFTW_SLIDING_DOT_PRODUCT(max_n=2**20) + + def _sliding_dot_product(Q, T): """ Compute the sliding dot product between `Q` and `T` diff --git a/test.sh b/test.sh index 194288574..fc80e76c3 100755 --- a/test.sh +++ b/test.sh @@ -154,27 +154,58 @@ gen_ray_coveragerc() # Generate a .coveragerc_override file that excludes Ray functions and tests gen_coveragerc_boilerplate echo " def .*_ray_*" >> .coveragerc_override - echo " def ,*_ray\(*" >> .coveragerc_override + echo " def .*_ray\(*" >> .coveragerc_override echo " def ray_.*" >> .coveragerc_override echo " def test_.*_ray*" >> .coveragerc_override } -set_ray_coveragerc() +check_fftw_pyfftw() { - # If `ray` command is not found then generate a .coveragerc_override file - if ! command -v ray &> /dev/null + if ! command -v fftw-wisdom &> /dev/null \ + || ! python -c "import pyfftw" &> /dev/null; then - echo "Ray Not Installed" + echo "FFTW and/or pyFFTW Not Installed" + else + echo "FFTW and pyFFTW Installed" + fi +} + +gen_pyfftw_coveragerc() +{ + gen_coveragerc_boilerplate + echo " class .*PYFFTW*" >> .coveragerc_override + echo " def test_.*pyfftw*" >> .coveragerc_override +} + +set_coveragerc() +{ + fcoveragerc="" + + if ! command -v ray &> /dev/null; + then + echo "Ray not installed" gen_ray_coveragerc - fcoveragerc="--rcfile=.coveragerc_override" else - echo "Ray Installed" + echo "Ray installed" + fi + + if ! command -v fftw-wisdom &> /dev/null \ + || ! python -c "import pyfftw" &> /dev/null; + then + echo "FFTW and/or pyFFTW not Installed" + gen_pyfftw_coveragerc + else + echo "FFTW and pyFFTW Installed" + fi + + if [ -f .coveragerc_override ]; then + fcoveragerc="--rcfile=.coveragerc_override" fi } show_coverage_report() { - set_ray_coveragerc + set_coveragerc coverage report --show-missing --fail-under=100 --skip-covered --omit=fastmath.py,docstring.py,versions.py $fcoveragerc check_errs $? } @@ -361,6 +392,7 @@ check_print check_pkg_imports check_naive check_ray +check_fftw_pyfftw if [[ -z $NUMBA_DISABLE_JIT || $NUMBA_DISABLE_JIT -eq 0 ]]; then @@ -405,4 +437,4 @@ else test_coverage fi -clean_up +clean_up \ No newline at end of file diff --git a/tests/test_sdp.py b/tests/test_sdp.py index b8d98eeb9..6537fefda 100644 --- a/tests/test_sdp.py +++ b/tests/test_sdp.py @@ -74,6 +74,9 @@ def get_sdp_function_names(): if func_name.endswith("sliding_dot_product"): out.append(func_name) + if sdp.FFTW_IS_AVAILABLE: + out.append("_pyfftw_sliding_dot_product") + return out @@ -152,3 +155,25 @@ def test_sdp_power2(): raise e return + + +def test_pyfftw_sdp_max_n(): + if not sdp.FFTW_IS_AVAILABLE: # pragma: no cover + pytest.skip("Skipping Test pyFFTW Not Installed") + + # When `len(T)` larger than `max_n` in pyfftw_sdp, + # the internal preallocated arrays should be resized. + # This test checks that functionality. + max_n = 2**10 + sdp_func = sdp._PYFFTW_SLIDING_DOT_PRODUCT(max_n) + + # len(T) > max_n to trigger array resizing + T = np.random.rand(max_n + 1) + Q = np.random.rand(2**8) + + comp = sdp_func(Q, T) + ref = naive.rolling_window_dot_product(Q, T) + + np.testing.assert_allclose(comp, ref) + + return From ae9050705a5be4bed2e6caf1e53bd9525482ad50 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 17 Feb 2026 00:35:57 -0500 Subject: [PATCH 02/27] fixed coverage --- stumpy/sdp.py | 2 +- tests/test_sdp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 12ef280d5..6db96b22d 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -294,7 +294,7 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): return real_arr[m - 1 : n] -if FFTW_IS_AVAILABLE: +if FFTW_IS_AVAILABLE: # pragma: no cover _pyfftw_sliding_dot_product = _PYFFTW_SLIDING_DOT_PRODUCT(max_n=2**20) diff --git a/tests/test_sdp.py b/tests/test_sdp.py index 6537fefda..90abb3528 100644 --- a/tests/test_sdp.py +++ b/tests/test_sdp.py @@ -74,7 +74,7 @@ def get_sdp_function_names(): if func_name.endswith("sliding_dot_product"): out.append(func_name) - if sdp.FFTW_IS_AVAILABLE: + if sdp.FFTW_IS_AVAILABLE: # pragma: no cover out.append("_pyfftw_sliding_dot_product") return out From 6051ab424dd8575dc54c02f787bc01ebfb3d0b35 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 19 Feb 2026 22:47:31 -0500 Subject: [PATCH 03/27] addressed comments --- stumpy/sdp.py | 52 +++++++++++++++++++++++------------------------ tests/test_sdp.py | 4 ++-- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 6db96b22d..58c1af1f1 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -9,9 +9,9 @@ try: # pragma: no cover import pyfftw - FFTW_IS_AVAILABLE = True + PYFFTW_IS_AVAILABLE = True except ImportError: # pragma: no cover - FFTW_IS_AVAILABLE = False + PYFFTW_IS_AVAILABLE = False @njit(fastmath=config.STUMPY_FASTMATH_TRUE) @@ -121,7 +121,7 @@ class _PYFFTW_SLIDING_DOT_PRODUCT: A class to compute the sliding dot product using FFTW via pyfftw. This class uses FFTW (via pyfftw) to efficiently compute the sliding dot product - between a query sequence Q and a time series T. It preallocates arrays and caches + between a query sequence, Q, and a time series, T. It preallocates arrays and caches FFTW objects to optimize repeated computations with similar-sized inputs. Parameters @@ -141,12 +141,12 @@ class _PYFFTW_SLIDING_DOT_PRODUCT: Preallocated complex-valued array for FFTW computations. rfft_objects : dict - Cache of FFTW forward transform objects, keyed by - (next_fast_n, n_threads, planning_flag). + Cache of FFTW forward transform objects with + (next_fast_n, n_threads, planning_flag) as lookup keys. irfft_objects : dict - Cache of FFTW inverse transform objects, keyed by - (next_fast_n, n_threads, planning_flag). + Cache of FFTW inverse transform objects with + (next_fast_n, n_threads, planning_flag) as lookup keys. Notes ----- @@ -247,7 +247,9 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): key = (next_fast_n, n_threads, planning_flag) rfft_obj = self.rfft_objects.get(key, None) - if rfft_obj is None: + irfft_obj = self.irfft_objects.get(key, None) + + if rfft_obj is None or irfft_obj is None: rfft_obj = pyfftw.FFTW( input_array=real_arr, output_array=complex_arr, @@ -255,12 +257,6 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): flags=(planning_flag,), threads=n_threads, ) - self.rfft_objects[key] = rfft_obj - else: - rfft_obj.update_arrays(real_arr, complex_arr) - - irfft_obj = self.irfft_objects.get(key, None) - if irfft_obj is None: irfft_obj = pyfftw.FFTW( input_array=complex_arr, output_array=real_arr, @@ -268,33 +264,35 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): flags=(planning_flag, "FFTW_DESTROY_INPUT"), threads=n_threads, ) + self.rfft_objects[key] = rfft_obj self.irfft_objects[key] = irfft_obj else: + rfft_obj.update_arrays(real_arr, complex_arr) irfft_obj.update_arrays(complex_arr, real_arr) - # RFFT(T) + # Compute RFFT of T real_arr[:n] = T real_arr[n:] = 0.0 - rfft_obj.execute() # output is in complex_arr - complex_arr_T = complex_arr.copy() + rfft_obj.execute() # output is stored in complex_arr - # RFFT(Q) - # Scale by 1/next_fast_n to account for - # FFTW's unnormalized inverse FFT via execute() + # need to make a copy since the array will be + # overwritten later during the RFFT(Q) step + rfft_of_T = complex_arr.copy() + + # Compute RFFT of Q (reversed and scaled by 1/next_fast_n) np.multiply(Q[::-1], 1.0 / next_fast_n, out=real_arr[:m]) real_arr[m:] = 0.0 - rfft_obj.execute() # output is in complex_arr - - # RFFT(T) * RFFT(Q) - np.multiply(complex_arr, complex_arr_T, out=complex_arr) + rfft_obj.execute() # output is stored in complex_arr + rfft_of_Q = complex_arr - # IRFFT (input is in complex_arr) - irfft_obj.execute() # output is in real_arr + # Compute IRFFT of the element-wise product of the RFFTs + np.multiply(rfft_of_Q, rfft_of_T, out=complex_arr) + irfft_obj.execute() # output is stored in real_arr return real_arr[m - 1 : n] -if FFTW_IS_AVAILABLE: # pragma: no cover +if PYFFTW_IS_AVAILABLE: # pragma: no cover _pyfftw_sliding_dot_product = _PYFFTW_SLIDING_DOT_PRODUCT(max_n=2**20) diff --git a/tests/test_sdp.py b/tests/test_sdp.py index 90abb3528..3ec26971f 100644 --- a/tests/test_sdp.py +++ b/tests/test_sdp.py @@ -74,7 +74,7 @@ def get_sdp_function_names(): if func_name.endswith("sliding_dot_product"): out.append(func_name) - if sdp.FFTW_IS_AVAILABLE: # pragma: no cover + if sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover out.append("_pyfftw_sliding_dot_product") return out @@ -158,7 +158,7 @@ def test_sdp_power2(): def test_pyfftw_sdp_max_n(): - if not sdp.FFTW_IS_AVAILABLE: # pragma: no cover + if not sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover pytest.skip("Skipping Test pyFFTW Not Installed") # When `len(T)` larger than `max_n` in pyfftw_sdp, From 0a3427e83c39a4f7dbc9b3036dfb82105297940a Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 19 Feb 2026 23:07:35 -0500 Subject: [PATCH 04/27] improved readability --- stumpy/sdp.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 58c1af1f1..11e0bb332 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -267,29 +267,34 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): self.rfft_objects[key] = rfft_obj self.irfft_objects[key] = irfft_obj else: + # Update the input and output arrays of the cached FFTW objects + # in case their original input and output arrays were reallocated + # in a previous call rfft_obj.update_arrays(real_arr, complex_arr) irfft_obj.update_arrays(complex_arr, real_arr) # Compute RFFT of T real_arr[:n] = T real_arr[n:] = 0.0 - rfft_obj.execute() # output is stored in complex_arr - - # need to make a copy since the array will be - # overwritten later during the RFFT(Q) step - rfft_of_T = complex_arr.copy() - - # Compute RFFT of Q (reversed and scaled by 1/next_fast_n) + rfft_obj.execute() + rfft_of_T = rfft_obj.output_array.copy() + # output array is stored in complex_arr, so make a copy + # to avoid losing it when it is overwritten when computing + # the RFFT of Q + + # Compute RFFT of Q (reversed, scaled, and zero-padded) + # Note: scaling is needed since the thin wrapper `execute` + # is called which does not perform normalization np.multiply(Q[::-1], 1.0 / next_fast_n, out=real_arr[:m]) real_arr[m:] = 0.0 - rfft_obj.execute() # output is stored in complex_arr - rfft_of_Q = complex_arr + rfft_obj.execute() + rfft_of_Q = rfft_obj.output_array - # Compute IRFFT of the element-wise product of the RFFTs - np.multiply(rfft_of_Q, rfft_of_T, out=complex_arr) - irfft_obj.execute() # output is stored in real_arr + # Convert back to time domain by taking the inverse RFFT + np.multiply(rfft_of_T, rfft_of_Q, out=irfft_obj.input_array) + irfft_obj.execute() - return real_arr[m - 1 : n] + return irfft_obj.output_array[m - 1 : n] if PYFFTW_IS_AVAILABLE: # pragma: no cover From 58d99e21f24168c79059f7528f256b8a2c403d19 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 19 Feb 2026 23:14:31 -0500 Subject: [PATCH 05/27] improved docstring --- stumpy/sdp.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 11e0bb332..91978810b 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -148,10 +148,19 @@ class _PYFFTW_SLIDING_DOT_PRODUCT: Cache of FFTW inverse transform objects with (next_fast_n, n_threads, planning_flag) as lookup keys. + Methods + ------- + __call__(Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE") + Compute the sliding dot product between `Q` and `T` using FFTW + via pyfftw, and cache FFTW objects if not already cached. + Notes ----- The class maintains internal caches of FFTW objects to avoid redundant planning operations when called multiple times with similar-sized inputs and parameters. + When `planning_flag=="FFTW_ESTIMATE"`, there will be no planning operation. + However, caching FFTW objects is still beneficial as the overhead of creating + those objects can be avoided in subsequent calls. Examples -------- From 0e82fbfd63aca1d75f8e57bdbc2f184fc3330c77 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 20 Feb 2026 10:00:25 -0500 Subject: [PATCH 06/27] add more comments --- stumpy/sdp.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 91978810b..73bfe7385 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -282,28 +282,33 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): rfft_obj.update_arrays(real_arr, complex_arr) irfft_obj.update_arrays(complex_arr, real_arr) - # Compute RFFT of T - real_arr[:n] = T - real_arr[n:] = 0.0 + # Compute the circular convolution between T and Q[::-1] + # using FFT, and then take the relevant slice of the output + + # Step 1 + # Compute RFFT of T (zero-padded) + # Must make a copy of output to avoid losing it when the array is + # overwritten when computing the RFFT of Q in the next step + rfft_obj.input_array[:n] = T + rfft_obj.input_array[n:] = 0.0 rfft_obj.execute() rfft_of_T = rfft_obj.output_array.copy() - # output array is stored in complex_arr, so make a copy - # to avoid losing it when it is overwritten when computing - # the RFFT of Q + # Step 2 # Compute RFFT of Q (reversed, scaled, and zero-padded) - # Note: scaling is needed since the thin wrapper `execute` - # is called which does not perform normalization - np.multiply(Q[::-1], 1.0 / next_fast_n, out=real_arr[:m]) - real_arr[m:] = 0.0 + # Scaling is required because the thin wrapper `execute` + # that will be called below does not perform normalization + np.multiply(Q[::-1], 1.0 / next_fast_n, out=rfft_obj.input_array[:m]) + rfft_obj.input_array[m:] = 0.0 rfft_obj.execute() rfft_of_Q = rfft_obj.output_array + # Step 3 # Convert back to time domain by taking the inverse RFFT np.multiply(rfft_of_T, rfft_of_Q, out=irfft_obj.input_array) irfft_obj.execute() - return irfft_obj.output_array[m - 1 : n] + return irfft_obj.output_array[m - 1 : n] # valid portion if PYFFTW_IS_AVAILABLE: # pragma: no cover From db09a611a43ab3273f0ff668a94680cad7ad86ac Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 20 Feb 2026 10:01:00 -0500 Subject: [PATCH 07/27] minor change --- stumpy/sdp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 73bfe7385..03ef94f03 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -282,7 +282,7 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): rfft_obj.update_arrays(real_arr, complex_arr) irfft_obj.update_arrays(complex_arr, real_arr) - # Compute the circular convolution between T and Q[::-1] + # Compute the (circular) convolution between T and Q[::-1] # using FFT, and then take the relevant slice of the output # Step 1 From 73d8229f7269b42630038abf41a2dc7347d7539a Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 20 Feb 2026 11:00:53 -0500 Subject: [PATCH 08/27] improved comment --- stumpy/sdp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 03ef94f03..0ac6a7dbc 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -282,8 +282,9 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): rfft_obj.update_arrays(real_arr, complex_arr) irfft_obj.update_arrays(complex_arr, real_arr) - # Compute the (circular) convolution between T and Q[::-1] - # using FFT, and then take the relevant slice of the output + # Compute the (circular) convolution between T and Q[::-1], + # each zero-padded to length `next_fast_n` by performing + # the following three steps: # Step 1 # Compute RFFT of T (zero-padded) From 61d83519c1571c8da08c75755ca0f3d19d177fa7 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 20 Feb 2026 11:01:47 -0500 Subject: [PATCH 09/27] fixed flake8 --- stumpy/sdp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 0ac6a7dbc..3723b6d53 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -282,7 +282,7 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): rfft_obj.update_arrays(real_arr, complex_arr) irfft_obj.update_arrays(complex_arr, real_arr) - # Compute the (circular) convolution between T and Q[::-1], + # Compute the (circular) convolution between T and Q[::-1], # each zero-padded to length `next_fast_n` by performing # the following three steps: From 96b68b531bb3e3b36440e82c6d9857a3166960b6 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 20 Feb 2026 11:17:32 -0500 Subject: [PATCH 10/27] minor change --- stumpy/sdp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 3723b6d53..a809bada9 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -293,7 +293,7 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): rfft_obj.input_array[:n] = T rfft_obj.input_array[n:] = 0.0 rfft_obj.execute() - rfft_of_T = rfft_obj.output_array.copy() + rfft_T = rfft_obj.output_array.copy() # Step 2 # Compute RFFT of Q (reversed, scaled, and zero-padded) @@ -302,11 +302,11 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): np.multiply(Q[::-1], 1.0 / next_fast_n, out=rfft_obj.input_array[:m]) rfft_obj.input_array[m:] = 0.0 rfft_obj.execute() - rfft_of_Q = rfft_obj.output_array + rfft_Q = rfft_obj.output_array # Step 3 # Convert back to time domain by taking the inverse RFFT - np.multiply(rfft_of_T, rfft_of_Q, out=irfft_obj.input_array) + np.multiply(rfft_T, rfft_Q, out=irfft_obj.input_array) irfft_obj.execute() return irfft_obj.output_array[m - 1 : n] # valid portion From 2ebbac179c793dbe31c4a34ce9ea79a1d1e79443 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Mon, 27 Apr 2026 22:18:47 -0400 Subject: [PATCH 11/27] empty commit From ec3744df6c2c99f94a8ae9de8a0c6f5b49c4632d Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 1 May 2026 23:55:03 -0400 Subject: [PATCH 12/27] revise condition for checking pyffftw --- test.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test.sh b/test.sh index fc80e76c3..91cee3753 100755 --- a/test.sh +++ b/test.sh @@ -161,12 +161,11 @@ gen_ray_coveragerc() check_fftw_pyfftw() { - if ! command -v fftw-wisdom &> /dev/null \ - || ! python -c "import pyfftw" &> /dev/null; + if ! python -c "import pyfftw" &> /dev/null; then - echo "FFTW and/or pyFFTW Not Installed" + echo "pyFFTW cannot be imported." else - echo "FFTW and pyFFTW Installed" + echo "pyFFTW can be imported!" fi } From 22e5a5538fc3ee4d8ba351adbfeb9f3ea30117e4 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 2 May 2026 20:20:12 -0400 Subject: [PATCH 13/27] add param to change dtype and check multi-thresding --- stumpy/sdp.py | 25 ++++++++++++++++---- tests/test_sdp.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index a809bada9..c37c02269 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -176,7 +176,7 @@ class _PYFFTW_SLIDING_DOT_PRODUCT: `pyfftw documentation `__ """ - def __init__(self, max_n=2**20): + def __init__(self, max_n=2**20, real_dtype="float64"): """ Initialize the `_PYFFTW_SLIDING_DOT_PRODUCT` object, which can be called to compute the sliding dot product using FFTW via pyfftw. @@ -188,13 +188,28 @@ def __init__(self, max_n=2**20): real-valued array. A complex-valued array of size `1 + (max_n // 2)` will also be preallocated. + real_dtype : str, default="float64" + The real data type to use for the preallocated arrays. Must be either + "float64" or "longdouble". The complex data type will be set to + "complex128" or "clongdouble" respectively. + Returns ------- None """ + REAL_TO_COMPLEX_MAP = { + "float64": "complex128", + "longdouble": "clongdouble", + } + if real_dtype not in ["float64", "longdouble"]: + raise ValueError( + f"Invalid real_dtype: {real_dtype}. Must be 'float64' or 'longdouble'." + ) + complex_dtype = REAL_TO_COMPLEX_MAP[real_dtype] + # Preallocate arrays - self.real_arr = pyfftw.empty_aligned(max_n, dtype="float64") - self.complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype="complex128") + self.real_arr = pyfftw.empty_aligned(max_n, dtype=real_dtype) + self.complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype=complex_dtype) # Store FFTW objects, keyed by (next_fast_n, n_threads, planning_flag) self.rfft_objects = {} @@ -244,9 +259,9 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): # Update preallocated arrays if needed if next_fast_n > len(self.real_arr): - self.real_arr = pyfftw.empty_aligned(next_fast_n, dtype="float64") + self.real_arr = pyfftw.empty_aligned(next_fast_n, dtype=self.real_arr.dtype) self.complex_arr = pyfftw.empty_aligned( - 1 + (next_fast_n // 2), dtype="complex128" + 1 + (next_fast_n // 2), dtype=self.complex_arr.dtype ) real_arr = self.real_arr[:next_fast_n] diff --git a/tests/test_sdp.py b/tests/test_sdp.py index 3ec26971f..8c9bc6484 100644 --- a/tests/test_sdp.py +++ b/tests/test_sdp.py @@ -177,3 +177,62 @@ def test_pyfftw_sdp_max_n(): np.testing.assert_allclose(comp, ref) return + + +def test_pyfftw_sdp_longdoube(): + if not sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover + pytest.skip("Skipping Test pyFFTW Not Installed") + + # This test checks that the pyfftw_sdp can be initialized with longdouble data type + max_n = 2**10 + sdp_func = sdp._PYFFTW_SLIDING_DOT_PRODUCT(max_n, real_dtype="longdouble") + + T = np.random.rand(max_n) + Q = np.random.rand(2**8) + + comp = sdp_func(Q, T) + ref = naive.rolling_window_dot_product(Q, T) + + np.testing.assert_allclose(comp, ref) + + return + + +def test_pyfftw_sdp_multithreaded(): + if not sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover + pytest.skip("Skipping Test pyFFTW Not Installed") + + # This test checks that the pyfftw_sdp can be initialized + # with multiple threads + max_n = 2**10 + sdp_func = sdp._PYFFTW_SLIDING_DOT_PRODUCT(max_n) + + T = np.random.rand(max_n) + Q = np.random.rand(2**8) + + comp = sdp_func(Q, T, n_threads=2) + ref = naive.rolling_window_dot_product(Q, T) + + np.testing.assert_allclose(comp, ref) + + return + + +def test_pyfftw_sdp_multithreaded_longdouble(): + if not sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover + pytest.skip("Skipping Test pyFFTW Not Installed") + + # This test checks that the pyfftw_sdp can be initialized + # with multiple threads and longdouble data type + max_n = 2**10 + sdp_func = sdp._PYFFTW_SLIDING_DOT_PRODUCT(max_n, real_dtype="longdouble") + + T = np.random.rand(max_n) + Q = np.random.rand(2**8) + + comp = sdp_func(Q, T, n_threads=2) + ref = naive.rolling_window_dot_product(Q, T) + + np.testing.assert_allclose(comp, ref) + + return From 7e75ade18fcb41fb61482050d9bcbb68e4a1face Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 2 May 2026 23:04:55 -0400 Subject: [PATCH 14/27] fixed coverage --- stumpy/sdp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index c37c02269..3e444fd4f 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -201,7 +201,7 @@ def __init__(self, max_n=2**20, real_dtype="float64"): "float64": "complex128", "longdouble": "clongdouble", } - if real_dtype not in ["float64", "longdouble"]: + if real_dtype not in ["float64", "longdouble"]: # pragma: no cover raise ValueError( f"Invalid real_dtype: {real_dtype}. Must be 'float64' or 'longdouble'." ) From 68d767cd559844973f6f45a053712cf49bfa35f3 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 3 May 2026 13:35:50 -0400 Subject: [PATCH 15/27] addressed comments --- stumpy/sdp.py | 2 +- test.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 3e444fd4f..64bf16f2d 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -191,7 +191,7 @@ def __init__(self, max_n=2**20, real_dtype="float64"): real_dtype : str, default="float64" The real data type to use for the preallocated arrays. Must be either "float64" or "longdouble". The complex data type will be set to - "complex128" or "clongdouble" respectively. + "complex128" or "clongdouble", respectively. Returns ------- diff --git a/test.sh b/test.sh index 91cee3753..681e7007e 100755 --- a/test.sh +++ b/test.sh @@ -163,9 +163,9 @@ check_fftw_pyfftw() { if ! python -c "import pyfftw" &> /dev/null; then - echo "pyFFTW cannot be imported." + echo "pyFFTW cannot be imported" else - echo "pyFFTW can be imported!" + echo "pyFFTW was successfully imported" fi } From d6fda460789150c0bb03963700c44719f19aeb7f Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Mon, 18 May 2026 20:31:27 -0400 Subject: [PATCH 16/27] remove instance of class from stumpy module --- stumpy/sdp.py | 4 ---- tests/test_sdp.py | 4 +++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 64bf16f2d..0815969f4 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -327,10 +327,6 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): return irfft_obj.output_array[m - 1 : n] # valid portion -if PYFFTW_IS_AVAILABLE: # pragma: no cover - _pyfftw_sliding_dot_product = _PYFFTW_SLIDING_DOT_PRODUCT(max_n=2**20) - - def _sliding_dot_product(Q, T): """ Compute the sliding dot product between `Q` and `T` diff --git a/tests/test_sdp.py b/tests/test_sdp.py index 8c9bc6484..981d17b6c 100644 --- a/tests/test_sdp.py +++ b/tests/test_sdp.py @@ -70,11 +70,13 @@ def get_sdp_function_names(): out = [] - for func_name, func in inspect.getmembers(sdp, inspect.isfunction): + for func_name, _ in inspect.getmembers(sdp, inspect.isfunction): if func_name.endswith("sliding_dot_product"): out.append(func_name) if sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover + callable_obj = sdp._PYFFTW_SLIDING_DOT_PRODUCT(max_n=2**20) + setattr(sdp, "_pyfftw_sliding_dot_product", callable_obj) out.append("_pyfftw_sliding_dot_product") return out From 51a21ebc89dacb38de23893a50087325e89f6e9b Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Tue, 19 May 2026 20:34:36 -0400 Subject: [PATCH 17/27] addressed comments --- stumpy/sdp.py | 4 ++++ tests/test_sdp.py | 29 ++++++++++------------------- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 0815969f4..64bf16f2d 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -327,6 +327,10 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): return irfft_obj.output_array[m - 1 : n] # valid portion +if PYFFTW_IS_AVAILABLE: # pragma: no cover + _pyfftw_sliding_dot_product = _PYFFTW_SLIDING_DOT_PRODUCT(max_n=2**20) + + def _sliding_dot_product(Q, T): """ Compute the sliding dot product between `Q` and `T` diff --git a/tests/test_sdp.py b/tests/test_sdp.py index 981d17b6c..99752378e 100644 --- a/tests/test_sdp.py +++ b/tests/test_sdp.py @@ -70,15 +70,10 @@ def get_sdp_function_names(): out = [] - for func_name, _ in inspect.getmembers(sdp, inspect.isfunction): + for func_name, _ in inspect.getmembers(sdp, callable): if func_name.endswith("sliding_dot_product"): out.append(func_name) - if sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover - callable_obj = sdp._PYFFTW_SLIDING_DOT_PRODUCT(max_n=2**20) - setattr(sdp, "_pyfftw_sliding_dot_product", callable_obj) - out.append("_pyfftw_sliding_dot_product") - return out @@ -163,17 +158,16 @@ def test_pyfftw_sdp_max_n(): if not sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover pytest.skip("Skipping Test pyFFTW Not Installed") - # When `len(T)` larger than `max_n` in pyfftw_sdp, + # When `len(T)` larger than `real_arr` in pyfftw_sdp, # the internal preallocated arrays should be resized. # This test checks that functionality. - max_n = 2**10 - sdp_func = sdp._PYFFTW_SLIDING_DOT_PRODUCT(max_n) - # len(T) > max_n to trigger array resizing - T = np.random.rand(max_n + 1) - Q = np.random.rand(2**8) + # len(T) > real_arr to trigger array resizing + real_arr_len = len(sdp._pyfftw_sliding_dot_product.real_arr) + T = np.random.rand(real_arr_len + 1) + Q = np.random.rand(2**2) - comp = sdp_func(Q, T) + comp = sdp._pyfftw_sliding_dot_product(Q, T) ref = naive.rolling_window_dot_product(Q, T) np.testing.assert_allclose(comp, ref) @@ -206,13 +200,10 @@ def test_pyfftw_sdp_multithreaded(): # This test checks that the pyfftw_sdp can be initialized # with multiple threads - max_n = 2**10 - sdp_func = sdp._PYFFTW_SLIDING_DOT_PRODUCT(max_n) + T = np.random.rand(2**5) + Q = np.random.rand(2**4) - T = np.random.rand(max_n) - Q = np.random.rand(2**8) - - comp = sdp_func(Q, T, n_threads=2) + comp = sdp._pyfftw_sliding_dot_product(Q, T, n_threads=2) ref = naive.rolling_window_dot_product(Q, T) np.testing.assert_allclose(comp, ref) From de9cade1b0a4cf12d6ee9b820231a7405e6e3051 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Wed, 20 May 2026 21:01:21 -0400 Subject: [PATCH 18/27] pass default value when creating an instance --- stumpy/sdp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 64bf16f2d..c8c2a3439 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -328,7 +328,9 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): if PYFFTW_IS_AVAILABLE: # pragma: no cover - _pyfftw_sliding_dot_product = _PYFFTW_SLIDING_DOT_PRODUCT(max_n=2**20) + _pyfftw_sliding_dot_product = _PYFFTW_SLIDING_DOT_PRODUCT( + max_n=2**20, real_dtype="float64" + ) def _sliding_dot_product(Q, T): From e5bf50e50da38e39a7258e374c9ba30d49b8f223 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 23 May 2026 22:33:45 -0400 Subject: [PATCH 19/27] replace class with closure (help from AI) --- stumpy/sdp.py | 157 ++++++++++++++++++---------------------------- tests/test_sdp.py | 22 ++++--- 2 files changed, 74 insertions(+), 105 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index c8c2a3439..6b0a663dd 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -116,11 +116,11 @@ def _pocketfft_sliding_dot_product(Q, T): return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] -class _PYFFTW_SLIDING_DOT_PRODUCT: +def _make_pyfftw_sliding_dot_product(max_n=2**20, real_dtype="float64"): """ - A class to compute the sliding dot product using FFTW via pyfftw. + A closure to compute the sliding dot product using FFTW via pyfftw. - This class uses FFTW (via pyfftw) to efficiently compute the sliding dot product + This closure uses FFTW (via pyfftw) to efficiently compute the sliding dot product between a query sequence, Q, and a time series, T. It preallocates arrays and caches FFTW objects to optimize repeated computations with similar-sized inputs. @@ -128,96 +128,55 @@ class _PYFFTW_SLIDING_DOT_PRODUCT: ---------- max_n : int, default=2**20 Maximum length to preallocate arrays for. This will be the size of the - real-valued array. A complex-valued array of size `1 + (max_n // 2)` + real-valued array. A complex-valued array of size 1 + (max_n // 2) will also be preallocated. If inputs exceed this size, arrays will be reallocated to accommodate larger sizes. - Attributes - ---------- - real_arr : pyfftw.empty_aligned - Preallocated real-valued array for FFTW computations. - - complex_arr : pyfftw.empty_aligned - Preallocated complex-valued array for FFTW computations. - - rfft_objects : dict - Cache of FFTW forward transform objects with - (next_fast_n, n_threads, planning_flag) as lookup keys. + real_dtype : str, default="float64" + The real data type to use for the preallocated arrays. Must be either + "float64" or "longdouble". The complex data type will be set to + "complex128" or "clongdouble", respectively. - irfft_objects : dict - Cache of FFTW inverse transform objects with - (next_fast_n, n_threads, planning_flag) as lookup keys. - - Methods + Returns ------- - __call__(Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE") - Compute the sliding dot product between `Q` and `T` using FFTW - via pyfftw, and cache FFTW objects if not already cached. + sliding_dot_product : callable + A callable that computes the sliding dot product between Q and T using FFTW + via pyfftw, and caches FFTW objects if not already cached. Notes ----- - The class maintains internal caches of FFTW objects to avoid redundant planning + The closure maintains internal caches of FFTW objects to avoid redundant planning operations when called multiple times with similar-sized inputs and parameters. - When `planning_flag=="FFTW_ESTIMATE"`, there will be no planning operation. + When planning_flag == "FFTW_ESTIMATE", there will be no planning operation. However, caching FFTW objects is still beneficial as the overhead of creating those objects can be avoided in subsequent calls. - Examples - -------- - >>> sdp_obj = _PYFFTW_SLIDING_DOT_PRODUCT(max_n=1000) - >>> Q = np.array([1, 2, 3]) - >>> T = np.array([4, 5, 6, 7, 8]) - >>> result = sdp_obj(Q, T) - References ---------- - `FFTW documentation `__ - - `pyfftw documentation `__ + FFTW documentation: http://www.fftw.org/ + pyfftw documentation: https://pyfftw.readthedocs.io/ """ - - def __init__(self, max_n=2**20, real_dtype="float64"): + REAL_TO_COMPLEX_MAP = { + "float64": "complex128", + "longdouble": "clongdouble", + } + if real_dtype not in ["float64", "longdouble"]: # pragma: no cover + raise ValueError( + f"Invalid real_dtype: {real_dtype}. Must be 'float64' or 'longdouble'." + ) + complex_dtype = REAL_TO_COMPLEX_MAP[real_dtype] + + # Preallocate arrays + real_arr = pyfftw.empty_aligned(max_n, dtype=real_dtype) + complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype=complex_dtype) + + # Store FFTW objects, keyed by (next_fast_n, n_threads, planning_flag) + rfft_objects = {} + irfft_objects = {} + + def sliding_dot_product(Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): """ - Initialize the `_PYFFTW_SLIDING_DOT_PRODUCT` object, which can be called - to compute the sliding dot product using FFTW via pyfftw. - - Parameters - ---------- - max_n : int, default=2**20 - Maximum length to preallocate arrays for. This will be the size of the - real-valued array. A complex-valued array of size `1 + (max_n // 2)` - will also be preallocated. - - real_dtype : str, default="float64" - The real data type to use for the preallocated arrays. Must be either - "float64" or "longdouble". The complex data type will be set to - "complex128" or "clongdouble", respectively. - - Returns - ------- - None - """ - REAL_TO_COMPLEX_MAP = { - "float64": "complex128", - "longdouble": "clongdouble", - } - if real_dtype not in ["float64", "longdouble"]: # pragma: no cover - raise ValueError( - f"Invalid real_dtype: {real_dtype}. Must be 'float64' or 'longdouble'." - ) - complex_dtype = REAL_TO_COMPLEX_MAP[real_dtype] - - # Preallocate arrays - self.real_arr = pyfftw.empty_aligned(max_n, dtype=real_dtype) - self.complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype=complex_dtype) - - # Store FFTW objects, keyed by (next_fast_n, n_threads, planning_flag) - self.rfft_objects = {} - self.irfft_objects = {} - - def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): - """ - Compute the sliding dot product between `Q` and `T` using FFTW via pyfftw, + Compute the sliding dot product between Q and T using FFTW via pyfftw, and cache FFTW objects if not already cached. Parameters @@ -242,7 +201,7 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): Returns ------- out : numpy.ndarray - Sliding dot product between `Q` and `T`. + Sliding dot product between Q and T. Notes ----- @@ -253,52 +212,54 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): This implementation is inspired by the answer on StackOverflow: https://stackoverflow.com/a/30615425/2955541 """ + nonlocal real_arr, complex_arr + m = Q.shape[0] n = T.shape[0] next_fast_n = pyfftw.next_fast_len(n) # Update preallocated arrays if needed - if next_fast_n > len(self.real_arr): - self.real_arr = pyfftw.empty_aligned(next_fast_n, dtype=self.real_arr.dtype) - self.complex_arr = pyfftw.empty_aligned( - 1 + (next_fast_n // 2), dtype=self.complex_arr.dtype + if next_fast_n > len(real_arr): + real_arr = pyfftw.empty_aligned(next_fast_n, dtype=real_arr.dtype) + complex_arr = pyfftw.empty_aligned( + 1 + (next_fast_n // 2), dtype=complex_arr.dtype ) - real_arr = self.real_arr[:next_fast_n] - complex_arr = self.complex_arr[: 1 + (next_fast_n // 2)] + real_view = real_arr[:next_fast_n] + complex_view = complex_arr[: 1 + (next_fast_n // 2)] # Get or create FFTW objects key = (next_fast_n, n_threads, planning_flag) - rfft_obj = self.rfft_objects.get(key, None) - irfft_obj = self.irfft_objects.get(key, None) + rfft_obj = rfft_objects.get(key, None) + irfft_obj = irfft_objects.get(key, None) if rfft_obj is None or irfft_obj is None: rfft_obj = pyfftw.FFTW( - input_array=real_arr, - output_array=complex_arr, + input_array=real_view, + output_array=complex_view, direction="FFTW_FORWARD", flags=(planning_flag,), threads=n_threads, ) irfft_obj = pyfftw.FFTW( - input_array=complex_arr, - output_array=real_arr, + input_array=complex_view, + output_array=real_view, direction="FFTW_BACKWARD", flags=(planning_flag, "FFTW_DESTROY_INPUT"), threads=n_threads, ) - self.rfft_objects[key] = rfft_obj - self.irfft_objects[key] = irfft_obj + rfft_objects[key] = rfft_obj + irfft_objects[key] = irfft_obj else: # Update the input and output arrays of the cached FFTW objects # in case their original input and output arrays were reallocated # in a previous call - rfft_obj.update_arrays(real_arr, complex_arr) - irfft_obj.update_arrays(complex_arr, real_arr) + rfft_obj.update_arrays(real_view, complex_view) + irfft_obj.update_arrays(complex_view, real_view) # Compute the (circular) convolution between T and Q[::-1], - # each zero-padded to length `next_fast_n` by performing + # each zero-padded to length next_fast_n by performing # the following three steps: # Step 1 @@ -312,7 +273,7 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): # Step 2 # Compute RFFT of Q (reversed, scaled, and zero-padded) - # Scaling is required because the thin wrapper `execute` + # Scaling is required because the thin wrapper execute # that will be called below does not perform normalization np.multiply(Q[::-1], 1.0 / next_fast_n, out=rfft_obj.input_array[:m]) rfft_obj.input_array[m:] = 0.0 @@ -326,9 +287,11 @@ def __call__(self, Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): return irfft_obj.output_array[m - 1 : n] # valid portion + return sliding_dot_product + if PYFFTW_IS_AVAILABLE: # pragma: no cover - _pyfftw_sliding_dot_product = _PYFFTW_SLIDING_DOT_PRODUCT( + _pyfftw_sliding_dot_product = _make_pyfftw_sliding_dot_product( max_n=2**20, real_dtype="float64" ) diff --git a/tests/test_sdp.py b/tests/test_sdp.py index 99752378e..0c1021a3b 100644 --- a/tests/test_sdp.py +++ b/tests/test_sdp.py @@ -70,8 +70,10 @@ def get_sdp_function_names(): out = [] - for func_name, _ in inspect.getmembers(sdp, callable): - if func_name.endswith("sliding_dot_product"): + for func_name, func in inspect.getmembers(sdp, callable): + if func_name.endswith("sliding_dot_product") and inspect.signature( + func + ).parameters.keys() >= {"Q", "T"}: out.append(func_name) return out @@ -162,12 +164,16 @@ def test_pyfftw_sdp_max_n(): # the internal preallocated arrays should be resized. # This test checks that functionality. - # len(T) > real_arr to trigger array resizing - real_arr_len = len(sdp._pyfftw_sliding_dot_product.real_arr) - T = np.random.rand(real_arr_len + 1) + max_n = 2**10 + _pyfftw_sliding_dot_product = sdp._make_pyfftw_sliding_dot_product( + max_n=max_n, real_dtype="float64" + ) + + # len(T) > max_n to trigger array resizing + T = np.random.rand(max_n + 1) Q = np.random.rand(2**2) - comp = sdp._pyfftw_sliding_dot_product(Q, T) + comp = _pyfftw_sliding_dot_product(Q, T) ref = naive.rolling_window_dot_product(Q, T) np.testing.assert_allclose(comp, ref) @@ -181,7 +187,7 @@ def test_pyfftw_sdp_longdoube(): # This test checks that the pyfftw_sdp can be initialized with longdouble data type max_n = 2**10 - sdp_func = sdp._PYFFTW_SLIDING_DOT_PRODUCT(max_n, real_dtype="longdouble") + sdp_func = sdp._make_pyfftw_sliding_dot_product(max_n, real_dtype="longdouble") T = np.random.rand(max_n) Q = np.random.rand(2**8) @@ -218,7 +224,7 @@ def test_pyfftw_sdp_multithreaded_longdouble(): # This test checks that the pyfftw_sdp can be initialized # with multiple threads and longdouble data type max_n = 2**10 - sdp_func = sdp._PYFFTW_SLIDING_DOT_PRODUCT(max_n, real_dtype="longdouble") + sdp_func = sdp._make_pyfftw_sliding_dot_product(max_n, real_dtype="longdouble") T = np.random.rand(max_n) Q = np.random.rand(2**8) From a050a95f60325ea79cd07f78ee048391d3c05fb1 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sun, 24 May 2026 09:07:40 -0400 Subject: [PATCH 20/27] updated logic for excluding pyfftw from coverage when unavailable --- test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test.sh b/test.sh index 681e7007e..7d1b30c1e 100755 --- a/test.sh +++ b/test.sh @@ -172,8 +172,8 @@ check_fftw_pyfftw() gen_pyfftw_coveragerc() { gen_coveragerc_boilerplate - echo " class .*PYFFTW*" >> .coveragerc_override - echo " def test_.*pyfftw*" >> .coveragerc_override + echo " def .*pyfftw.*" >> .coveragerc_override + echo " def test_.*pyfftw.*" >> .coveragerc_override } set_coveragerc() From 00878c37b1afc947b7693c36f6908ff0b2e1fc2a Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 26 Jun 2026 19:36:50 -0400 Subject: [PATCH 21/27] fixed black formatting --- stumpy/sdp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 3dd6551ff..2047c8f44 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -5,7 +5,6 @@ from . import config - # _duccfft replaced _pocketfft in scipy 1.18 try: from scipy.fft._duccfft.basic import c2r, r2c From b8ee3210c4d6ac72b5f34088c3e76a529a7d24f6 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 26 Jun 2026 23:16:34 -0400 Subject: [PATCH 22/27] add function to allow user to reset max_n --- stumpy/sdp.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 2047c8f44..05eefd29b 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -146,7 +146,10 @@ def _make_pyfftw_sliding_dot_product(max_n=2**20, real_dtype="float64"): ------- sliding_dot_product : callable A callable that computes the sliding dot product between Q and T using FFTW - via pyfftw, and caches FFTW objects if not already cached. + via pyfftw, and caches FFTW objects if not already cached. The callable + automatically resizes the preallocated arrays if the inputs exceed + the current maximum size. In addition, the callable has a method + `reset_arr(max_n)` to reset the preallocated arrays to a new maximum length. Notes ----- @@ -292,6 +295,25 @@ def sliding_dot_product(Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): return irfft_obj.output_array[m - 1 : n] # valid portion + def reset_arr(max_n): # pragma: no cover + """ + Reset the preallocated arrays to a new maximum length. + + Parameters + ---------- + max_n : int + New maximum length for the preallocated arrays. + + Returns + ------- + None + """ + nonlocal real_arr, complex_arr + complex_dtype = REAL_TO_COMPLEX_MAP[real_dtype] + real_arr = pyfftw.empty_aligned(max_n, dtype=real_dtype) + complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype=complex_dtype) + + sliding_dot_product.reset_arr = reset_arr return sliding_dot_product From 28e429f9644f2d4f1783551191e2056df6a95e1a Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Fri, 26 Jun 2026 23:52:00 -0400 Subject: [PATCH 23/27] add comment at top to describe module --- stumpy/sdp.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 05eefd29b..c5edde843 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -1,3 +1,8 @@ +# This file contains different implementations of +# the sliding dot product (sdp). The name of any +# callable object that computes the sliding dot product +# should end with 'sliding_dot_product'. + import numpy as np from numba import njit from scipy.fft import next_fast_len From 710406d51046a881fb671dfbb91c3af0234e6630 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 27 Jun 2026 00:15:09 -0400 Subject: [PATCH 24/27] fixed docstring and comments --- stumpy/sdp.py | 4 ++-- tests/test_sdp.py | 22 +++++++++++++--------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index c5edde843..56f534398 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -136,13 +136,13 @@ def _make_pyfftw_sliding_dot_product(max_n=2**20, real_dtype="float64"): Parameters ---------- - max_n : int, default=2**20 + max_n : int, default 2**20 Maximum length to preallocate arrays for. This will be the size of the real-valued array. A complex-valued array of size 1 + (max_n // 2) will also be preallocated. If inputs exceed this size, arrays will be reallocated to accommodate larger sizes. - real_dtype : str, default="float64" + real_dtype : str, default "float64" The real data type to use for the preallocated arrays. Must be either "float64" or "longdouble". The complex data type will be set to "complex128" or "clongdouble", respectively. diff --git a/tests/test_sdp.py b/tests/test_sdp.py index 0c1021a3b..13b970add 100644 --- a/tests/test_sdp.py +++ b/tests/test_sdp.py @@ -160,16 +160,16 @@ def test_pyfftw_sdp_max_n(): if not sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover pytest.skip("Skipping Test pyFFTW Not Installed") - # When `len(T)` larger than `real_arr` in pyfftw_sdp, - # the internal preallocated arrays should be resized. - # This test checks that functionality. + # When `len(T)` larger than original `max_n`, + # the callable object returned by `_make_pyfftw_sliding_dot_product` + # should still work correctly. This test checks that functionality. max_n = 2**10 _pyfftw_sliding_dot_product = sdp._make_pyfftw_sliding_dot_product( max_n=max_n, real_dtype="float64" ) - # len(T) > max_n to trigger array resizing + # len(T) > max_n to trigger internal array resizing T = np.random.rand(max_n + 1) Q = np.random.rand(2**2) @@ -185,7 +185,9 @@ def test_pyfftw_sdp_longdoube(): if not sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover pytest.skip("Skipping Test pyFFTW Not Installed") - # This test checks that the pyfftw_sdp can be initialized with longdouble data type + # This test checks that the callable object + # returned by `_make_pyfftw_sliding_dot_product` + # can support `real_dtype="longdouble"` max_n = 2**10 sdp_func = sdp._make_pyfftw_sliding_dot_product(max_n, real_dtype="longdouble") @@ -204,8 +206,9 @@ def test_pyfftw_sdp_multithreaded(): if not sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover pytest.skip("Skipping Test pyFFTW Not Installed") - # This test checks that the pyfftw_sdp can be initialized - # with multiple threads + # This test checks that the callable object + # returned by `_make_pyfftw_sliding_dot_product` + # can support multithreading. T = np.random.rand(2**5) Q = np.random.rand(2**4) @@ -221,8 +224,9 @@ def test_pyfftw_sdp_multithreaded_longdouble(): if not sdp.PYFFTW_IS_AVAILABLE: # pragma: no cover pytest.skip("Skipping Test pyFFTW Not Installed") - # This test checks that the pyfftw_sdp can be initialized - # with multiple threads and longdouble data type + # This test checks that the callable object + # returned by `_make_pyfftw_sliding_dot_product` + # can support multithreading and `real_dtype="longdouble"` max_n = 2**10 sdp_func = sdp._make_pyfftw_sliding_dot_product(max_n, real_dtype="longdouble") From 94e924cd5f7052e851ed09ca74a1ab436d4f9516 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Thu, 2 Jul 2026 01:04:01 -0400 Subject: [PATCH 25/27] addressed comment --- stumpy/sdp.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 56f534398..f088072d3 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -300,7 +300,7 @@ def sliding_dot_product(Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): return irfft_obj.output_array[m - 1 : n] # valid portion - def reset_arr(max_n): # pragma: no cover + def set_max_n(max_n): # pragma: no cover """ Reset the preallocated arrays to a new maximum length. @@ -314,11 +314,10 @@ def reset_arr(max_n): # pragma: no cover None """ nonlocal real_arr, complex_arr - complex_dtype = REAL_TO_COMPLEX_MAP[real_dtype] - real_arr = pyfftw.empty_aligned(max_n, dtype=real_dtype) - complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype=complex_dtype) + real_arr = pyfftw.empty_aligned(max_n, dtype=real_arr.dtype) + complex_arr = pyfftw.empty_aligned(1 + (max_n // 2), dtype=complex_arr.dtype) - sliding_dot_product.reset_arr = reset_arr + sliding_dot_product.set_max_n = set_max_n return sliding_dot_product From 23713fc7b04e69c490308a7f03d27f24f629cca4 Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 4 Jul 2026 22:41:56 -0400 Subject: [PATCH 26/27] minor changes in docstring --- stumpy/sdp.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index f088072d3..ab40ef784 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -130,8 +130,8 @@ def _make_pyfftw_sliding_dot_product(max_n=2**20, real_dtype="float64"): """ A closure to compute the sliding dot product using FFTW via pyfftw. - This closure uses FFTW (via pyfftw) to efficiently compute the sliding dot product - between a query sequence, Q, and a time series, T. It preallocates arrays and caches + This closure returns a callable that computes the sliding dot product between + a query array, ``Q``, and a time series, ``T``. It preallocates arrays and caches FFTW objects to optimize repeated computations with similar-sized inputs. Parameters @@ -154,7 +154,7 @@ def _make_pyfftw_sliding_dot_product(max_n=2**20, real_dtype="float64"): via pyfftw, and caches FFTW objects if not already cached. The callable automatically resizes the preallocated arrays if the inputs exceed the current maximum size. In addition, the callable has a method - `reset_arr(max_n)` to reset the preallocated arrays to a new maximum length. + `set_max_n` to set the preallocated arrays to a new maximum length. Notes ----- @@ -189,7 +189,7 @@ def _make_pyfftw_sliding_dot_product(max_n=2**20, real_dtype="float64"): def sliding_dot_product(Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): """ - Compute the sliding dot product between Q and T using FFTW via pyfftw, + Compute the sliding dot product between ``Q`` and `T`` using FFTW via pyfftw, and cache FFTW objects if not already cached. Parameters @@ -214,7 +214,7 @@ def sliding_dot_product(Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): Returns ------- out : numpy.ndarray - Sliding dot product between Q and T. + Sliding dot product between ``Q`` and ``T``. Notes ----- @@ -329,7 +329,7 @@ def set_max_n(max_n): # pragma: no cover def _sliding_dot_product(Q, T): """ - Compute the sliding dot product between `Q` and `T` + Compute the sliding dot product between ``Q`` and ``T`` Parameters ---------- From 161086d138b2587ec992872f200455c9374e634e Mon Sep 17 00:00:00 2001 From: NimaSarajpoor Date: Sat, 4 Jul 2026 23:28:35 -0400 Subject: [PATCH 27/27] minor changes --- stumpy/sdp.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/stumpy/sdp.py b/stumpy/sdp.py index ab40ef784..15c73427e 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -128,7 +128,7 @@ def _pocketfft_sliding_dot_product(Q, T): def _make_pyfftw_sliding_dot_product(max_n=2**20, real_dtype="float64"): """ - A closure to compute the sliding dot product using FFTW via pyfftw. + A closure to compute the sliding dot product using FFTW via pyfftw This closure returns a callable that computes the sliding dot product between a query array, ``Q``, and a time series, ``T``. It preallocates arrays and caches @@ -189,8 +189,8 @@ def _make_pyfftw_sliding_dot_product(max_n=2**20, real_dtype="float64"): def sliding_dot_product(Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): """ - Compute the sliding dot product between ``Q`` and `T`` using FFTW via pyfftw, - and cache FFTW objects if not already cached. + Compute the sliding dot product between ``Q`` and ``T`` using FFTW via pyfftw, + and cache FFTW objects if not already cached Parameters ---------- @@ -200,10 +200,10 @@ def sliding_dot_product(Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): T : numpy.ndarray Time series or sequence. - n_threads : int, default=1 + n_threads : int, default 1 Number of threads to use for FFTW computations. - planning_flag : str, default="FFTW_ESTIMATE" + planning_flag : str, default "FFTW_ESTIMATE" The planning flag that will be used in FFTW for planning. See pyfftw documentation for details. Current options, ordered ascendingly by the level of aggressiveness in planning, are: @@ -302,7 +302,7 @@ def sliding_dot_product(Q, T, n_threads=1, planning_flag="FFTW_ESTIMATE"): def set_max_n(max_n): # pragma: no cover """ - Reset the preallocated arrays to a new maximum length. + Set the preallocated arrays to a new maximum length Parameters ---------- @@ -342,6 +342,6 @@ def _sliding_dot_product(Q, T): Returns ------- out : numpy.ndarray - Sliding dot product between `Q` and `T`. + Sliding dot product between ``Q`` and ``T`` """ return _convolve_sliding_dot_product(Q, T)