diff --git a/stumpy/gpu_stump.py b/stumpy/gpu_stump.py index 8eb54a88c..d8e926730 100644 --- a/stumpy/gpu_stump.py +++ b/stumpy/gpu_stump.py @@ -14,22 +14,22 @@ @cuda.jit( - "(i8, f8[:], f8[:], i8, f8[:], f8[:], f8[:], f8[:], f8[:]," - "f8[:], f8[:], b1[:], b1[:], i8, b1, i8, f8[:, :], f8[:], " - "f8[:], i8[:, :], i8[:], i8[:], b1, i8[:], i8, i8)" + "(i8, f8[:], f8[:], i8, f8[:], f8[:], f8[:], f8[:], f8[:], f8[:], f8[:], f8[:], f8[:], b1[:], b1[:], i8, b1, i8, f8[:, :], f8[:], f8[:], i8[:, :], i8[:], i8[:], b1, i8[:], i8, i8)" # noqa: E501 ) def _compute_and_update_PI_kernel( idx, T_A, T_B, m, - QT_even, - QT_odd, - QT_first, + cov_even, + cov_odd, + cov_first, + μ_Q_m_1, + M_T_m_1, μ_Q, - σ_Q, + σ_Q_inverse, M_T, - Σ_T, + Σ_T_inverse, Q_subseq_isconstant, T_subseq_isconstant, w, @@ -41,7 +41,7 @@ def _compute_and_update_PI_kernel( indices, indices_L, indices_R, - compute_QT, + compute_cov, bfs, nlevel, k, @@ -64,28 +64,31 @@ def _compute_and_update_PI_kernel( m : int Window size - QT_even : numpy.ndarray - The input QT array (dot product between the query sequence,`Q`, and - time series, `T`) to use when `i` is even + cov_even : numpy.ndarray + The input covariance array to use when `i` is even - QT_odd : numpy.ndarray - The input QT array (dot product between the query sequence,`Q`, and - time series, `T`) to use when `i` is odd + cov_odd : numpy.ndarray + The input covariance array to use when `i` is odd - QT_first : numpy.ndarray - Dot product between the first query sequence,`Q`, and time series, `T` + cov_first : numpy.ndarray + Covariance between the first query sequence, `Q`, and time series, `T` - μ_Q : numpy.ndarray - Mean of the query sequence, `Q` + μ_Q_m_1 : numpy.ndarray + The mean of the time series, Q, for each window size, m-1 - σ_Q : numpy.ndarray - Standard deviation of the query sequence, `Q` + M_T_m_1 : numpy.ndarray + The mean of the time series, T, for each window size, m-1 - M_T : numpy.ndarray - Sliding mean of time series, `T` - - Σ_T : numpy.ndarray - Sliding standard deviation of time series, `T` + μ_Q : ndarray + The mean of the time series, Q, for each window size, m + σ_Q_inverse : ndarray + The inverse standard deviation of the time series, Q, for each window + size, m + M_T : ndarray + The mean of the time series, T, for each window size, m + Σ_T_inverse : ndarray + The inverse standard deviation of the time series, T, for each window + size, m Q_subseq_isconstant : numpy.ndarray A boolean array that indicates whether the subsequence in `Q` is constant (True) @@ -122,8 +125,8 @@ def _compute_and_update_PI_kernel( indices_R : numpy.ndarray The (top-1) right matrix profile indices - compute_QT : bool - A boolean flag for whether or not to compute QT + compute_cov : bool + A boolean flag for whether or not to compute covariance bfs : numpy.ndarray The breadth-first-search indices where the missing leaves of its corresponding @@ -154,36 +157,59 @@ def _compute_and_update_PI_kernel( j = idx # The name `i` is reserved to be used as an index for `T_A` + m_inverse = 1.0 / m + constant = (m - 1) * m_inverse * m_inverse + two_m = 2.0 * m + + M_T_m_1_j = M_T_m_1[j] + cov_a_j = T_B[j + m - 1] - M_T_m_1_j + if j == 0: + cov_c_j = T_B[m - 1] - M_T_m_1_j + else: + cov_c_j = T_B[j - 1] - M_T_m_1_j + + adj_cov_a_j = cov_a_j * constant + adj_cov_c_j = cov_c_j * constant + M_T_j_isinf = math.isinf(M_T[j]) + Σ_T_inverse_j = Σ_T_inverse[j] + T_subseq_isconstant_j = T_subseq_isconstant[j] if j % 2 == 0: - QT_out = QT_even - QT_in = QT_odd + cov_out = cov_even + cov_in = cov_odd else: - QT_out = QT_odd - QT_in = QT_even + cov_out = cov_odd + cov_in = cov_even - for i in range(start, QT_out.shape[0], stride): + for i in range(start, w, stride): zone_start = max(0, i - excl_zone) zone_stop = min(w, i + excl_zone) - if compute_QT: - QT_out[i] = ( - QT_in[i - 1] - T_A[i - 1] * T_B[j - 1] + T_A[i + m - 1] * T_B[j + m - 1] - ) - - QT_out[0] = QT_first[j] - if math.isinf(μ_Q[i]) or math.isinf(M_T[j]): + if compute_cov: + μ_Q_m_1_i = μ_Q_m_1[i] + cov_b_i = T_A[i + m - 1] - μ_Q_m_1_i + if i == 0: + cov_d_i = T_A[m - 1] - μ_Q_m_1_i + else: + cov_d_i = T_A[i - 1] - μ_Q_m_1_i + + if i == 0: + cov_out[0] = cov_first[j] + else: + cov_out[i] = cov_in[i - 1] + ( + adj_cov_a_j * cov_b_i - adj_cov_c_j * cov_d_i + ) + + if M_T_j_isinf or math.isinf(μ_Q[i]): p_norm = np.inf - elif Q_subseq_isconstant[i] and T_subseq_isconstant[j]: - p_norm = 0 - elif Q_subseq_isconstant[i] or T_subseq_isconstant[j]: + elif T_subseq_isconstant_j and Q_subseq_isconstant[i]: + p_norm = 0.0 + elif T_subseq_isconstant_j or Q_subseq_isconstant[i]: p_norm = m else: - denom = (σ_Q[i] * Σ_T[j]) * m - denom = max(denom, config.STUMPY_DENOM_THRESHOLD) - ρ = (QT_out[i] - (μ_Q[i] * M_T[j]) * m) / denom - ρ = min(ρ, 1.0) - p_norm = 2 * m * (1.0 - ρ) + pearson = cov_out[i] * (σ_Q_inverse[i] * Σ_T_inverse_j) + pearson = min(pearson, 1.0) + p_norm = two_m * (1.0 - pearson) if ignore_trivial: if j <= zone_stop and j >= zone_start: @@ -212,11 +238,13 @@ def _gpu_stump( range_stop, excl_zone, μ_Q_fname, - σ_Q_fname, - QT_fname, - QT_first_fname, + σ_Q_inverse_fname, + cov_fname, + cov_first_fname, + μ_Q_m_1_fname, + M_T_m_1_fname, M_T_fname, - Σ_T_fname, + Σ_T_inverse_fname, Q_subseq_isconstant_fname, T_subseq_isconstant_fname, w, @@ -253,26 +281,30 @@ def _gpu_stump( sliding window μ_Q_fname : str - The file name for the mean of the query sequence, `Q`, relative to - the current sliding window + The file name for the mean of the time series, Q, for each window size, m - σ_Q_fname : str - The file name for the standard deviation of the query sequence, `Q`, - relative to the current sliding window + σ_Q_inverse_fname : str + The file name for the inverse standard deviation of the time series, Q, + for each window size, m - QT_fname : str - The file name for the dot product between some query sequence,`Q`, - and time series, `T` + cov_fname : str + The file name for the sliding covariance - QT_first_fname : str - The file name for the QT for the first window relative to the current - sliding window + cov_first_fname : str + The file name for the covariance of the first window + + μ_Q_m_1_fname : str + The file name for the sliding mean of Q with window m-1 + + M_T_m_1_fname : str + The file name for the sliding mean of T with window m-1 M_T_fname : str - The file name for the sliding mean of time series, `T` + The file name for the mean of the time series, T, for each window size, m - Σ_T_fname : str - The file name for the sliding standard deviation of time series, `T` + Σ_T_inverse_fname : str + The file name for the inverse standard deviation of the time series, T, + for each window size, m Q_subseq_isconstant_fname : str The file name for the rolling isconstant in `Q` @@ -304,11 +336,20 @@ def _gpu_stump( profile_fname : str The file name for the matrix profile + profile_L_fname : str + The file name for the left matrix profile + + profile_R_fname : str + The file name for the right matrix profile + indices_fname : str - The file name for the matrix profile indices. The first column of the - array consists of the matrix profile indices, the second column consists - of the left matrix profile indices, and the third column consists of the - right matrix profile indices. + The file name for the matrix profile indices + + indices_L_fname : str + The file name for the left matrix profile indices + + indices_R_fname : str + The file name for the right matrix profile indices Notes ----- @@ -321,8 +362,8 @@ def _gpu_stump( (or index) of all its subsequences in another times series, T_B. Return: For every subsequence, Q, in T_A, you will get a distance - and index for the closest subsequence in T_A. Thus, the array - returned will have length T_A.shape[0]-m+1. Additionally, the + and index for the closest subsequence in T_B. Thus, the array + returned will have length T_A.shape[0] - m + 1. Additionally, the left and right matrix profiles are also returned. Note: Unlike in the Table II where T_A.shape is expected to be equal @@ -333,7 +374,7 @@ def _gpu_stump( Additionally, unlike STAMP where the exclusion zone is m/2, the default exclusion zone for STOMP is m/4 (See Definition 3 and Figure 3). - For self-joins, set `ignore_trivial = True` in order to avoid the + For self-joins, set ignore_trivial = True in order to avoid the trivial match. Note that left and right matrix profiles are only available for self-joins. @@ -343,36 +384,40 @@ def _gpu_stump( T_A = np.load(T_A_fname, allow_pickle=False) T_B = np.load(T_B_fname, allow_pickle=False) - QT = np.load(QT_fname, allow_pickle=False) - QT_first = np.load(QT_first_fname, allow_pickle=False) + cov = np.load(cov_fname, allow_pickle=False) + cov_first = np.load(cov_first_fname, allow_pickle=False) + μ_Q_m_1 = np.load(μ_Q_m_1_fname, allow_pickle=False) + M_T_m_1 = np.load(M_T_m_1_fname, allow_pickle=False) μ_Q = np.load(μ_Q_fname, allow_pickle=False) - σ_Q = np.load(σ_Q_fname, allow_pickle=False) + σ_Q_inverse = np.load(σ_Q_inverse_fname, allow_pickle=False) M_T = np.load(M_T_fname, allow_pickle=False) - Σ_T = np.load(Σ_T_fname, allow_pickle=False) + Σ_T_inverse = np.load(Σ_T_inverse_fname, allow_pickle=False) Q_subseq_isconstant = np.load(Q_subseq_isconstant_fname, allow_pickle=False) T_subseq_isconstant = np.load(T_subseq_isconstant_fname, allow_pickle=False) - nlevel = np.floor(np.log2(k) + 1).astype(np.int64) # number of levels in binary search tree from which `bfs` is constructed. + nlevel = np.floor(np.log2(k) + 1).astype(np.int64) with cuda.gpus[device_id]: device_T_A = cuda.to_device(T_A) - device_QT_odd = cuda.to_device(QT) - device_QT_even = cuda.to_device(QT) - device_QT_first = cuda.to_device(QT_first) + device_cov_odd = cuda.to_device(cov) + device_cov_even = cuda.to_device(cov) + device_cov_first = cuda.to_device(cov_first) + device_μ_Q_m_1 = cuda.to_device(μ_Q_m_1) + device_M_T_m_1 = cuda.to_device(M_T_m_1) device_μ_Q = cuda.to_device(μ_Q) - device_σ_Q = cuda.to_device(σ_Q) + device_σ_Q_inverse = cuda.to_device(σ_Q_inverse) device_Q_subseq_isconstant = cuda.to_device(Q_subseq_isconstant) if ignore_trivial: device_T_B = device_T_A device_M_T = device_μ_Q - device_Σ_T = device_σ_Q + device_Σ_T_inverse = device_σ_Q_inverse device_T_subseq_isconstant = device_Q_subseq_isconstant else: device_T_B = cuda.to_device(T_B) device_M_T = cuda.to_device(M_T) - device_Σ_T = cuda.to_device(Σ_T) + device_Σ_T_inverse = cuda.to_device(Σ_T_inverse) device_T_subseq_isconstant = cuda.to_device(T_subseq_isconstant) profile = np.full((w, k), np.inf, dtype=np.float64) @@ -397,13 +442,15 @@ def _gpu_stump( device_T_A, device_T_B, m, - device_QT_even, - device_QT_odd, - device_QT_first, + device_cov_even, + device_cov_odd, + device_cov_first, + device_μ_Q_m_1, + device_M_T_m_1, device_μ_Q, - device_σ_Q, + device_σ_Q_inverse, device_M_T, - device_Σ_T, + device_Σ_T_inverse, device_Q_subseq_isconstant, device_T_subseq_isconstant, w, @@ -427,13 +474,15 @@ def _gpu_stump( device_T_A, device_T_B, m, - device_QT_even, - device_QT_odd, - device_QT_first, + device_cov_even, + device_cov_odd, + device_cov_first, + device_μ_Q_m_1, + device_M_T_m_1, device_μ_Q, - device_σ_Q, + device_σ_Q_inverse, device_M_T, - device_Σ_T, + device_Σ_T_inverse, device_Q_subseq_isconstant, device_T_subseq_isconstant, w, @@ -651,10 +700,10 @@ def gpu_stump( ignore_trivial = True T_B_subseq_isconstant = T_A_subseq_isconstant - T_A, μ_Q, σ_Q, Q_subseq_isconstant = core.preprocess( + T_A, μ_Q_with_inf, σ_Q, Q_subseq_isconstant = core.preprocess( T_A, m, T_subseq_isconstant=T_A_subseq_isconstant ) - T_B, M_T, Σ_T, T_subseq_isconstant = core.preprocess( + T_B, M_T_with_inf, Σ_T, T_subseq_isconstant = core.preprocess( T_B, m, T_subseq_isconstant=T_B_subseq_isconstant ) @@ -685,12 +734,23 @@ def gpu_stump( np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM) ) # See Definition 3 and Figure 3 + # Precalculate for sliding covariance + M_T, _ = core.compute_mean_std(T_B, m) + μ_Q, _ = core.compute_mean_std(T_A, m) + + M_T_m_1, _ = core.compute_mean_std(T_B, m - 1) + μ_Q_m_1, _ = core.compute_mean_std(T_A, m - 1) + T_A_fname = core.array_to_temp_file(T_A) T_B_fname = core.array_to_temp_file(T_B) - μ_Q_fname = core.array_to_temp_file(μ_Q) - σ_Q_fname = core.array_to_temp_file(σ_Q) - M_T_fname = core.array_to_temp_file(M_T) - Σ_T_fname = core.array_to_temp_file(Σ_T) + μ_Q_m_1_fname = core.array_to_temp_file(μ_Q_m_1) + M_T_m_1_fname = core.array_to_temp_file(M_T_m_1) + μ_Q_fname = core.array_to_temp_file(μ_Q_with_inf) + σ_Q_inverse = 1.0 / np.maximum(σ_Q, config.STUMPY_DENOM_THRESHOLD) + σ_Q_inverse_fname = core.array_to_temp_file(σ_Q_inverse) + M_T_fname = core.array_to_temp_file(M_T_with_inf) + Σ_T_inverse = 1.0 / np.maximum(Σ_T, config.STUMPY_DENOM_THRESHOLD) + Σ_T_inverse_fname = core.array_to_temp_file(Σ_T_inverse) Q_subseq_isconstant_fname = core.array_to_temp_file(Q_subseq_isconstant) T_subseq_isconstant_fname = core.array_to_temp_file(T_subseq_isconstant) @@ -723,17 +783,20 @@ def gpu_stump( pool = mp.Pool(processes=len(device_ids)) results = [None] * len(device_ids) - QT_fnames = [] - QT_first_fnames = [] + cov_fnames = [] + cov_first_fnames = [] for idx, start in enumerate(range(0, l, step)): stop = min(l, start + step) QT, QT_first = core._get_QT(start, T_A, T_B, m) - QT_fname = core.array_to_temp_file(QT) - QT_first_fname = core.array_to_temp_file(QT_first) - QT_fnames.append(QT_fname) - QT_first_fnames.append(QT_first_fname) + cov = (QT / m) - (μ_Q * M_T[start]) + cov_first = (QT_first / m) - (μ_Q[0] * M_T) + + cov_fname = core.array_to_temp_file(cov) + cov_first_fname = core.array_to_temp_file(cov_first) + cov_fnames.append(cov_fname) + cov_first_fnames.append(cov_first_fname) if len(device_ids) > 1 and idx < len(device_ids) - 1: # pragma: no cover # Spawn and execute in child process for multi-GPU request @@ -746,11 +809,13 @@ def gpu_stump( stop, excl_zone, μ_Q_fname, - σ_Q_fname, - QT_fname, - QT_first_fname, + σ_Q_inverse_fname, + cov_fname, + cov_first_fname, + μ_Q_m_1_fname, + M_T_m_1_fname, M_T_fname, - Σ_T_fname, + Σ_T_inverse_fname, Q_subseq_isconstant_fname, T_subseq_isconstant_fname, w, @@ -777,11 +842,13 @@ def gpu_stump( stop, excl_zone, μ_Q_fname, - σ_Q_fname, - QT_fname, - QT_first_fname, + σ_Q_inverse_fname, + cov_fname, + cov_first_fname, + μ_Q_m_1_fname, + M_T_m_1_fname, M_T_fname, - Σ_T_fname, + Σ_T_inverse_fname, Q_subseq_isconstant_fname, T_subseq_isconstant_fname, w, @@ -810,16 +877,18 @@ def gpu_stump( os.remove(T_A_fname) os.remove(T_B_fname) + os.remove(μ_Q_m_1_fname) + os.remove(M_T_m_1_fname) os.remove(μ_Q_fname) - os.remove(σ_Q_fname) + os.remove(σ_Q_inverse_fname) os.remove(M_T_fname) - os.remove(Σ_T_fname) + os.remove(Σ_T_inverse_fname) os.remove(Q_subseq_isconstant_fname) os.remove(T_subseq_isconstant_fname) - for QT_fname in QT_fnames: - os.remove(QT_fname) - for QT_first_fname in QT_first_fnames: - os.remove(QT_first_fname) + for cov_fname in cov_fnames: + os.remove(cov_fname) + for cov_first_fname in cov_first_fnames: + os.remove(cov_first_fname) for idx in range(len(device_ids)): profile_fname = profile[idx]