From e7d8a2b174be44a82f634536bf4a6bf2f4bde4cf Mon Sep 17 00:00:00 2001
From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com>
Date: Tue, 15 Aug 2023 14:05:20 -0600
Subject: [PATCH 1/3] feat: `multiexp_serial` skips doubling when all bits are
 zero (#10)

* feat: `multiexp_serial` skips doubling when all bits are zero

For each `c` bit "segment", we get the bits from each coefficient in
`coeffs` ahead of time and determine what the largest segment is. We use
this to determine the largest nonzero segment and skip higher segments.

Before this, we always looked at all 256 bits and did a doubling
regardless. Now, if all the coefficients in the MSM are 64 bits, we will
detect this first and only do pippenger up to 64 bits. This only saves
doublings because the pippenger buckets for addition were already
dynamically created.

* feat: handle case where all bits are 0
---
 halo2_proofs/src/arithmetic.rs | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)
diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs
index 7366fb7b19..161f37569f 100644
--- a/halo2_proofs/src/arithmetic.rs
+++ b/halo2_proofs/src/arithmetic.rs
@@ -1,6 +1,8 @@
 //! This module provides common utilities, traits and structures for group,
 //! field and polynomial arithmetic.
 
+use std::cmp;
+
 use super::multicore;
 pub use ff::Field;
 use group::{
@@ -25,6 +27,7 @@ where
 {
 }
 
+// ASSUMES C::Scalar::Repr is little endian
 fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut C::Curve) {
     let coeffs: Vec<_> = coeffs.iter().map(|a| a.to_repr()).collect();
 
@@ -36,6 +39,7 @@ fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut
         (f64::from(bases.len() as u32)).ln().ceil() as usize
     };
 
+    // Group `bytes` into bits and take the `segment`th chunk of `c` bits
     fn get_at<F: PrimeField>(segment: usize, c: usize, bytes: &F::Repr) -> usize {
         let skip_bits = segment * c;
         let skip_bytes = skip_bits / 8;
@@ -58,7 +62,32 @@ fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut
 
     let segments = (256 / c) + 1;
 
-    for current_segment in (0..segments).rev() {
+    // this can be optimized
+    let mut coeffs_in_segments = Vec::with_capacity(segments);
+    // track what is the last segment where we actually have nonzero bits, so we completely skip buckets where the scalar bits for all coeffs are 0
+    let mut max_nonzero_segment = None;
+    for current_segment in 0..segments {
+        let coeff_segments: Vec<_> = coeffs
+            .iter()
+            .map(|coeff| {
+                let c_bits = get_at::<C::Scalar>(current_segment, c, coeff);
+                if c_bits != 0 {
+                    max_nonzero_segment = Some(current_segment);
+                }
+                c_bits
+            })
+            .collect();
+        coeffs_in_segments.push(coeff_segments);
+    }
+
+    if max_nonzero_segment.is_none() {
+        return;
+    }
+    for coeffs_seg in coeffs_in_segments
+        .into_iter()
+        .take(max_nonzero_segment.unwrap() + 1)
+        .rev()
+    {
         for _ in 0..c {
             *acc = acc.double();
         }
@@ -96,8 +125,7 @@ fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut
 
         let mut buckets: Vec<Bucket<C>> = vec![Bucket::None; (1 << c) - 1];
 
-        for (coeff, base) in coeffs.iter().zip(bases.iter()) {
-            let coeff = get_at::<C::Scalar>(current_segment, c, coeff);
+        for (coeff, base) in coeffs_seg.into_iter().zip(bases.iter()) {
             if coeff != 0 {
                 buckets[coeff - 1].add_assign(base);
             }

From c0857984d0d318eaba594be06e9179166f7fe8ca Mon Sep 17 00:00:00 2001
From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com>
Date: Fri, 1 Sep 2023 15:37:46 -0700
Subject: [PATCH 2/3] feat(msm): start running sum from `max_bits`

The summation by parts at the end of each pippenger bucket is doing an
addition with identity unnecessarily for unused bits
---
 halo2_proofs/src/arithmetic.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs
index 161f37569f..9c7242a36a 100644
--- a/halo2_proofs/src/arithmetic.rs
+++ b/halo2_proofs/src/arithmetic.rs
@@ -60,7 +60,7 @@ fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut
         tmp as usize
     }
 
-    let segments = (256 / c) + 1;
+    let segments = (C::Scalar::NUM_BITS as usize + c - 1) / c;
 
     // this can be optimized
     let mut coeffs_in_segments = Vec::with_capacity(segments);
@@ -125,8 +125,10 @@ fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut
 
         let mut buckets: Vec<Bucket<C>> = vec![Bucket::None; (1 << c) - 1];
 
+        let mut max_bits = 0;
         for (coeff, base) in coeffs_seg.into_iter().zip(bases.iter()) {
             if coeff != 0 {
+                max_bits = cmp::max(max_bits, coeff);
                 buckets[coeff - 1].add_assign(base);
             }
         }
@@ -136,7 +138,7 @@ fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut
         //                    (a) + b +
         //                    ((a) + b) + c
         let mut running_sum = C::Curve::identity();
-        for exp in buckets.into_iter().rev() {
+        for exp in buckets.into_iter().take(max_bits).rev() {
             running_sum = exp.add(running_sum);
             *acc = *acc + &running_sum;
         }

From d3e3ba3ae5f2c06c3012ce087385ef6b5f7d1462 Mon Sep 17 00:00:00 2001
From: Jonathan Wang <31040440+jonathanpwang@users.noreply.github.com>
Date: Fri, 1 Sep 2023 15:41:18 -0700
Subject: [PATCH 3/3] chore: add comment

---
 halo2_proofs/src/arithmetic.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs
index 9c7242a36a..59e881f926 100644
--- a/halo2_proofs/src/arithmetic.rs
+++ b/halo2_proofs/src/arithmetic.rs
@@ -60,6 +60,7 @@ fn multiexp_serial<C: CurveAffine>(coeffs: &[C::Scalar], bases: &[C], acc: &mut
         tmp as usize
     }
 
+    // Ideally `segments` should be calculated from the max number of bits among all scalars. But this requires a scan of all scalars, so we don't implement it for now.
     let segments = (C::Scalar::NUM_BITS as usize + c - 1) / c;
 
     // this can be optimized