From 30f6fbf5f831a142555c4506491daecca23c2f8a Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Fri, 22 Mar 2024 19:08:52 +0000 Subject: [PATCH 01/11] feat: make selector polynomials optional (#11) --- halo2_gadgets/benches/poseidon.rs | 2 +- halo2_gadgets/src/poseidon.rs | 2 +- halo2_gadgets/src/poseidon/pow5.rs | 24 ++++++++--------- halo2_gadgets/src/poseidon/primitives.rs | 2 +- halo2_proofs/benches/lookups.rs | 6 ++--- .../examples/simple-lookup-unblinded.rs | 12 ++++----- halo2_proofs/examples/simple-lookup.rs | 12 ++++----- halo2_proofs/src/dev.rs | 2 +- halo2_proofs/src/dev/cost.rs | 2 +- halo2_proofs/src/plonk.rs | 4 +-- halo2_proofs/src/plonk/circuit.rs | 20 ++++++++++---- .../src/plonk/circuit/compress_selectors.rs | 26 ++++++++++++++----- halo2_proofs/src/plonk/keygen.rs | 8 +++--- 13 files changed, 72 insertions(+), 50 deletions(-) diff --git a/halo2_gadgets/benches/poseidon.rs b/halo2_gadgets/benches/poseidon.rs index b6a7c940b4..c81623bcbc 100644 --- a/halo2_gadgets/benches/poseidon.rs +++ b/halo2_gadgets/benches/poseidon.rs @@ -100,7 +100,7 @@ where let message_word = |i: usize| { let value = self.message.map(|message_vals| message_vals[i]); region.assign_advice( - || format!("load message_{}", i), + || format!("load message_{i}"), config.input[i], 0, || value, diff --git a/halo2_gadgets/src/poseidon.rs b/halo2_gadgets/src/poseidon.rs index bfd78f3dee..400e90f671 100644 --- a/halo2_gadgets/src/poseidon.rs +++ b/halo2_gadgets/src/poseidon.rs @@ -288,7 +288,7 @@ impl< .enumerate() { self.sponge - .absorb(layouter.namespace(|| format!("absorb_{}", i)), value)?; + .absorb(layouter.namespace(|| format!("absorb_{i}")), value)?; } self.sponge .finish_absorbing(layouter.namespace(|| "finish absorbing"))? diff --git a/halo2_gadgets/src/poseidon/pow5.rs b/halo2_gadgets/src/poseidon/pow5.rs index 51c1f059ca..e288947179 100644 --- a/halo2_gadgets/src/poseidon/pow5.rs +++ b/halo2_gadgets/src/poseidon/pow5.rs @@ -286,7 +286,7 @@ impl< let mut state = Vec::with_capacity(WIDTH); let mut load_state_word = |i: usize, value: F| -> Result<_, Error> { let var = region.assign_advice_from_constant( - || format!("state_{}", i), + || format!("state_{i}"), config.state[i], 0, value, @@ -325,7 +325,7 @@ impl< initial_state[i] .0 .copy_advice( - || format!("load state_{}", i), + || format!("load state_{i}"), &mut region, config.state[i], 0, @@ -341,7 +341,7 @@ impl< let constraint_var = match input.0[i].clone() { Some(PaddedWord::Message(word)) => word, Some(PaddedWord::Padding(padding_value)) => region.assign_fixed( - || format!("load pad_{}", i), + || format!("load pad_{i}"), config.rc_b[i], 1, || Value::known(padding_value), @@ -350,7 +350,7 @@ impl< }; constraint_var .copy_advice( - || format!("load input_{}", i), + || format!("load input_{i}"), &mut region, config.state[i], 1, @@ -370,7 +370,7 @@ impl< .unwrap_or_else(|| Value::known(F::ZERO)); region .assign_advice( - || format!("load output_{}", i), + || format!("load output_{i}"), config.state[i], 2, || value, @@ -474,7 +474,7 @@ impl Pow5State { }); region.assign_advice( - || format!("round_{} partial_sbox", round), + || format!("round_{round} partial_sbox"), config.partial_sbox, offset, || r.as_ref().map(|r| r[0]), @@ -536,7 +536,7 @@ impl Pow5State { let load_state_word = |i: usize| { initial_state[i] .0 - .copy_advice(|| format!("load state_{}", i), region, config.state[i], 0) + .copy_advice(|| format!("load state_{i}"), region, config.state[i], 0) .map(StateWord) }; @@ -558,7 +558,7 @@ impl Pow5State { // Load the round constants. let mut load_round_constant = |i: usize| { region.assign_fixed( - || format!("round_{} rc_{}", round, i), + || format!("round_{round} rc_{i}"), config.rc_a[i], offset, || Value::known(config.round_constants[round][i]), @@ -574,7 +574,7 @@ impl Pow5State { let next_state_word = |i: usize| { let value = next_state[i]; let var = region.assign_advice( - || format!("round_{} state_{}", next_round, i), + || format!("round_{next_round} state_{i}"), config.state[i], offset + 1, || value, @@ -649,7 +649,7 @@ mod tests { let state_word = |i: usize| { let value = Value::known(Fp::from(i as u64)); let var = region.assign_advice( - || format!("load state_{}", i), + || format!("load state_{i}"), config.state[i], 0, || value, @@ -688,7 +688,7 @@ mod tests { |mut region| { let mut final_state_word = |i: usize| { let var = region.assign_advice( - || format!("load final_state_{}", i), + || format!("load final_state_{i}"), config.state[i], 0, || Value::known(expected_final_state[i]), @@ -774,7 +774,7 @@ mod tests { let message_word = |i: usize| { let value = self.message.map(|message_vals| message_vals[i]); region.assign_advice( - || format!("load message_{}", i), + || format!("load message_{i}"), config.state[i], 0, || value, diff --git a/halo2_gadgets/src/poseidon/primitives.rs b/halo2_gadgets/src/poseidon/primitives.rs index c456c87f54..cc1e123552 100644 --- a/halo2_gadgets/src/poseidon/primitives.rs +++ b/halo2_gadgets/src/poseidon/primitives.rs @@ -310,7 +310,7 @@ impl Domain for Const type Padding = iter::Take>; fn name() -> String { - format!("ConstantLength<{}>", L) + format!("ConstantLength<{L}>") } fn initial_capacity_element() -> F { diff --git a/halo2_proofs/benches/lookups.rs b/halo2_proofs/benches/lookups.rs index 5529a50b86..41d5742d9c 100644 --- a/halo2_proofs/benches/lookups.rs +++ b/halo2_proofs/benches/lookups.rs @@ -114,7 +114,7 @@ fn criterion_benchmark(c: &mut Criterion) { |mut table| { for row in 0u64..(1 << 8) { table.assign_cell( - || format!("row {}", row), + || format!("row {row}"), config.table, row as usize, || Value::known(F::from(row)), @@ -131,7 +131,7 @@ fn criterion_benchmark(c: &mut Criterion) { for offset in 0u64..(1 << 10) { config.selector.enable(&mut region, offset as usize)?; region.assign_advice( - || format!("offset {}", offset), + || format!("offset {offset}"), config.advice, offset as usize, || Value::known(F::from(offset % 256)), @@ -140,7 +140,7 @@ fn criterion_benchmark(c: &mut Criterion) { for offset in 1u64..(1 << 10) { config.selector.enable(&mut region, offset as usize)?; region.assign_advice( - || format!("offset {}", offset), + || format!("offset {offset}"), config.other_advice, offset as usize - 1, || Value::known(F::from(offset % 256)), diff --git a/halo2_proofs/examples/simple-lookup-unblinded.rs b/halo2_proofs/examples/simple-lookup-unblinded.rs index bd9d03ffa6..71c922b855 100644 --- a/halo2_proofs/examples/simple-lookup-unblinded.rs +++ b/halo2_proofs/examples/simple-lookup-unblinded.rs @@ -100,7 +100,7 @@ fn main() { let diff = lhs - rhs; - let constraint = diff.clone() * (Expression::Constant(F::ZERO) - diff.clone()); + let constraint = diff.clone() * (Expression::Constant(F::ZERO) - diff); Constraints::with_selector(s_mul, vec![constraint]) }); @@ -118,14 +118,14 @@ fn main() { |mut table| { for row in 0u64..2_u64.pow(K - 1) { table.assign_cell( - || format!("input row {}", row), + || format!("input row {row}"), config.table_input, row as usize, || Value::known(F::from(row)), )?; // table output (2x the input) -- yeehaw table.assign_cell( - || format!("output row {}", row), + || format!("output row {row}"), config.table_output, row as usize, || Value::known(F::from(2 * row)), @@ -144,14 +144,14 @@ fn main() { config.qlookup.enable(&mut region, offset as usize)?; // input region.assign_advice( - || format!("offset {}", offset), + || format!("offset {offset}"), config.advice, offset as usize, || Value::known(F::from(offset)), )?; // 2x let cell = region.assign_advice( - || format!("offset {}", offset), + || format!("offset {offset}"), config.other_advice, offset as usize, || Value::known(F::from(2 * offset)), @@ -247,7 +247,7 @@ fn main() { env_logger::init(); - println!("k = {}", K); + println!("k = {K}"); // time it println!("keygen"); let start = std::time::Instant::now(); diff --git a/halo2_proofs/examples/simple-lookup.rs b/halo2_proofs/examples/simple-lookup.rs index 6fbea8d56d..aa9ddefc8a 100644 --- a/halo2_proofs/examples/simple-lookup.rs +++ b/halo2_proofs/examples/simple-lookup.rs @@ -100,7 +100,7 @@ fn main() { let diff = lhs - rhs; - let constraint = diff.clone() * (Expression::Constant(F::ZERO) - diff.clone()); + let constraint = diff.clone() * (Expression::Constant(F::ZERO) - diff); Constraints::with_selector(s_mul, vec![constraint]) }); @@ -118,14 +118,14 @@ fn main() { |mut table| { for row in 0u64..2_u64.pow(K - 1) { table.assign_cell( - || format!("input row {}", row), + || format!("input row {row}"), config.table_input, row as usize, || Value::known(F::from(row)), )?; // table output (2x the input) -- yeehaw table.assign_cell( - || format!("output row {}", row), + || format!("output row {row}"), config.table_output, row as usize, || Value::known(F::from(2 * row)), @@ -144,14 +144,14 @@ fn main() { config.qlookup.enable(&mut region, offset as usize)?; // input region.assign_advice( - || format!("offset {}", offset), + || format!("offset {offset}"), config.advice, offset as usize, || Value::known(F::from(offset)), )?; // 2x let cell = region.assign_advice( - || format!("offset {}", offset), + || format!("offset {offset}"), config.other_advice, offset as usize, || Value::known(F::from(2 * offset)), @@ -247,7 +247,7 @@ fn main() { env_logger::init(); - println!("k = {}", K); + println!("k = {K}"); // time it println!("keygen"); let start = std::time::Instant::now(); diff --git a/halo2_proofs/src/dev.rs b/halo2_proofs/src/dev.rs index 4e48adef8f..f06278e429 100644 --- a/halo2_proofs/src/dev.rs +++ b/halo2_proofs/src/dev.rs @@ -711,7 +711,7 @@ impl + Ord> MockProver { )?; } - let (cs, selector_polys) = prover.cs.compress_selectors(prover.selectors.clone()); + let (cs, selector_polys) = prover.cs.compress_selectors(prover.selectors.clone(), true); prover.cs = cs; prover.fixed.extend(selector_polys.into_iter().map(|poly| { let mut v = vec![CellValue::Unassigned; n]; diff --git a/halo2_proofs/src/dev/cost.rs b/halo2_proofs/src/dev/cost.rs index 735f1f0dc7..6c1a0f7d45 100644 --- a/halo2_proofs/src/dev/cost.rs +++ b/halo2_proofs/src/dev/cost.rs @@ -283,7 +283,7 @@ impl> CircuitCost= cs.minimum_rows()); diff --git a/halo2_proofs/src/plonk.rs b/halo2_proofs/src/plonk.rs index 3933552b9d..540c895082 100644 --- a/halo2_proofs/src/plonk.rs +++ b/halo2_proofs/src/plonk.rs @@ -178,12 +178,12 @@ where Ok(selector) }) .collect::>()?; - let (cs, _) = cs.compress_selectors(selectors.clone()); + let (cs, _) = cs.compress_selectors(selectors.clone(), false); (cs, selectors) } else { // we still need to replace selectors with fixed Expressions in `cs` let fake_selectors = vec![vec![]; cs.num_selectors]; - let (cs, _) = cs.directly_convert_selectors_to_fixed(fake_selectors); + let (cs, _) = cs.directly_convert_selectors_to_fixed(fake_selectors, false); (cs, vec![]) }; diff --git a/halo2_proofs/src/plonk/circuit.rs b/halo2_proofs/src/plonk/circuit.rs index 50c0d1ac42..550518be1e 100644 --- a/halo2_proofs/src/plonk/circuit.rs +++ b/halo2_proofs/src/plonk/circuit.rs @@ -2127,7 +2127,11 @@ impl ConstraintSystem { /// find which fixed column corresponds with a given `Selector`. /// /// Do not call this twice. Yes, this should be a builder pattern instead. - pub fn compress_selectors(mut self, selectors: Vec>) -> (Self, Vec>) { + pub fn compress_selectors( + mut self, + selectors: Vec>, + return_polys: bool, + ) -> (Self, Vec>) { // The number of provided selector assignments must be the number we // counted for this constraint system. assert_eq!(selectors.len(), self.num_selectors); @@ -2171,6 +2175,7 @@ impl ConstraintSystem { rotation: Rotation::cur(), }) }, + return_polys, ); let mut selector_map = vec![None; selector_assignment.len()]; @@ -2197,6 +2202,7 @@ impl ConstraintSystem { pub fn directly_convert_selectors_to_fixed( mut self, selectors: Vec>, + return_polys: bool, ) -> (Self, Vec>) { // The number of provided selector assignments must be the number we // counted for this constraint system. @@ -2205,10 +2211,14 @@ impl ConstraintSystem { let (polys, selector_replacements): (Vec<_>, Vec<_>) = selectors .into_iter() .map(|selector| { - let poly = selector - .iter() - .map(|b| if *b { F::ONE } else { F::ZERO }) - .collect::>(); + let poly = if return_polys { + selector + .iter() + .map(|b| if *b { F::ONE } else { F::ZERO }) + .collect::>() + } else { + vec![] + }; let column = self.fixed_column(); let rotation = Rotation::cur(); let expr = Expression::Fixed(FixedQuery { diff --git a/halo2_proofs/src/plonk/circuit/compress_selectors.rs b/halo2_proofs/src/plonk/circuit/compress_selectors.rs index 053ebe3178..7bf917ef73 100644 --- a/halo2_proofs/src/plonk/circuit/compress_selectors.rs +++ b/halo2_proofs/src/plonk/circuit/compress_selectors.rs @@ -52,6 +52,7 @@ pub fn process( mut selectors: Vec, max_degree: usize, mut allocate_fixed_column: E, + return_polys: bool, ) -> (Vec>, Vec>) where E: FnMut() -> Expression, @@ -76,11 +77,16 @@ where // gate constraint. let expression = allocate_fixed_column(); - let combination_assignment = selector - .activations - .iter() - .map(|b| if *b { F::ONE } else { F::ZERO }) - .collect::>(); + let combination_assignment = if return_polys { + selector + .activations + .iter() + .map(|b| if *b { F::ONE } else { F::ZERO }) + .collect::>() + } else { + vec![] + }; + let combination_index = combination_assignments.len(); combination_assignments.push(combination_assignment); selector_assignments.push(SelectorAssignment { @@ -177,7 +183,11 @@ where } // Now, compute the selector and combination assignments. - let mut combination_assignment = vec![F::ZERO; n]; + let mut combination_assignment = if return_polys { + vec![F::ZERO; n] + } else { + vec![] + }; let combination_len = combination.len(); let combination_index = combination_assignments.len(); let query = allocate_fixed_column(); @@ -201,6 +211,7 @@ where } // Update the combination assignment + // if return polys is false the iteration will not run as it is empty for (combination, selector) in combination_assignment .iter_mut() .zip(selector.activations.iter()) @@ -220,6 +231,7 @@ where expression, } })); + // if return polys is false this is empty combination_assignments.push(combination_assignment); } @@ -287,7 +299,7 @@ mod tests { }); query += 1; tmp - }); + }, true); { let mut selectors_seen = vec![]; diff --git a/halo2_proofs/src/plonk/keygen.rs b/halo2_proofs/src/plonk/keygen.rs index 53872f554d..f684def6fe 100644 --- a/halo2_proofs/src/plonk/keygen.rs +++ b/halo2_proofs/src/plonk/keygen.rs @@ -265,11 +265,11 @@ where let mut fixed = batch_invert_assigned(assembly.fixed); let (cs, selector_polys) = if compress_selectors { - cs.compress_selectors(assembly.selectors.clone()) + cs.compress_selectors(assembly.selectors.clone(), true) } else { // After this, the ConstraintSystem should not have any selectors: `verify` does not need them, and `keygen_pk` regenerates `cs` from scratch anyways. let selectors = std::mem::take(&mut assembly.selectors); - cs.directly_convert_selectors_to_fixed(selectors) + cs.directly_convert_selectors_to_fixed(selectors, true) }; fixed.extend( selector_polys @@ -338,9 +338,9 @@ where let mut fixed = batch_invert_assigned(assembly.fixed); let (cs, selector_polys) = if vk.compress_selectors { - cs.compress_selectors(assembly.selectors) + cs.compress_selectors(assembly.selectors, true) } else { - cs.directly_convert_selectors_to_fixed(assembly.selectors) + cs.directly_convert_selectors_to_fixed(assembly.selectors, true) }; fixed.extend( selector_polys From 010eda91ecaa91b9389c8ce2dc92737d1d69d991 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Wed, 3 Jul 2024 12:45:59 -0400 Subject: [PATCH 02/11] feat: parallel poly read --- halo2_proofs/Cargo.toml | 1 + halo2_proofs/src/poly.rs | 30 +++++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index 0af0b87e65..0d811dd77a 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -106,6 +106,7 @@ icicle_gpu = ["icicle", "rustacuda"] mv-lookup = [] cost-estimator = ["serde", "serde_derive"] derive_serde = ["halo2curves/derive_serde"] +parallel-poly-read = [] [lib] bench = false diff --git a/halo2_proofs/src/poly.rs b/halo2_proofs/src/poly.rs index a1fad0637a..753b65eb82 100644 --- a/halo2_proofs/src/poly.rs +++ b/halo2_proofs/src/poly.rs @@ -6,8 +6,9 @@ use crate::arithmetic::parallelize; use crate::helpers::SerdePrimeField; use crate::plonk::Assigned; use crate::SerdeFormat; - use group::ff::{BatchInvert, Field}; +#[cfg(feature = "parallel-poly-read")] +use maybe_rayon::{iter::ParallelIterator, prelude::ParallelSliceMut}; use std::fmt::Debug; use std::io; use std::marker::PhantomData; @@ -161,6 +162,33 @@ impl Polynomial { impl Polynomial { /// Reads polynomial from buffer using `SerdePrimeField::read`. + #[cfg(feature = "parallel-poly-read")] + pub(crate) fn read(reader: &mut R, format: SerdeFormat) -> io::Result { + let mut poly_len = [0u8; 4]; + reader.read_exact(&mut poly_len)?; + let poly_len = u32::from_be_bytes(poly_len) as usize; + + let repr_len = F::default().to_repr().as_ref().len(); + + let mut new_vals = vec![0u8; poly_len * repr_len]; + reader.read_exact(&mut new_vals)?; + + // parallel read + new_vals + .par_chunks_mut(repr_len) + .map(|chunk| { + let mut chunk = io::Cursor::new(chunk); + F::read(&mut chunk, format) + }) + .collect::>>() + .map(|values| Self { + values, + _marker: PhantomData, + }) + } + + /// Reads polynomial from buffer using `SerdePrimeField::read`. + #[cfg(not(feature = "parallel-poly-read"))] pub(crate) fn read(reader: &mut R, format: SerdeFormat) -> io::Result { let mut poly_len = [0u8; 4]; reader.read_exact(&mut poly_len)?; From 4b1ac89add55614be3cd0a933ddc0ae3385b3ddf Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Wed, 3 Jul 2024 15:56:59 -0400 Subject: [PATCH 03/11] fix: non work stealing to improve pk loading on linux --- halo2_proofs/src/poly.rs | 41 ++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/halo2_proofs/src/poly.rs b/halo2_proofs/src/poly.rs index 753b65eb82..73031406fd 100644 --- a/halo2_proofs/src/poly.rs +++ b/halo2_proofs/src/poly.rs @@ -169,22 +169,39 @@ impl Polynomial { let poly_len = u32::from_be_bytes(poly_len) as usize; let repr_len = F::default().to_repr().as_ref().len(); + let total_len = poly_len * repr_len; - let mut new_vals = vec![0u8; poly_len * repr_len]; + let mut new_vals = vec![0u8; total_len]; reader.read_exact(&mut new_vals)?; + let default_parallelism_approx = match std::thread::available_parallelism() { + Ok(parallelism) => usize::from(parallelism), + _ => 1, + }; + + log::debug!( + "[parallel-poly-read] default_parallelism_approx = {}", + default_parallelism_approx + ); + + let num_of_reps_per_chunk = std::cmp::max(1, poly_len / default_parallelism_approx); + let chunk_size = num_of_reps_per_chunk * repr_len; + // parallel read - new_vals - .par_chunks_mut(repr_len) - .map(|chunk| { - let mut chunk = io::Cursor::new(chunk); - F::read(&mut chunk, format) - }) - .collect::>>() - .map(|values| Self { - values, - _marker: PhantomData, - }) + Ok(Self { + values: new_vals + .par_chunks(chunk_size) + .map(|mut chunk| { + let mut felts = Vec::with_capacity(num_of_reps_per_chunk); + while !chunk.is_empty() { + felts.push(F::read(&mut chunk, format).unwrap()); + } + felts + }) + .flatten() + .collect::>(), + _marker: PhantomData, + }) } /// Reads polynomial from buffer using `SerdePrimeField::read`. From 13e0333fe042c3f8b528952c173adc99438249f0 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:01:09 -0400 Subject: [PATCH 04/11] fix: rayon imports --- halo2_proofs/src/poly.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/halo2_proofs/src/poly.rs b/halo2_proofs/src/poly.rs index 73031406fd..fec65ccda9 100644 --- a/halo2_proofs/src/poly.rs +++ b/halo2_proofs/src/poly.rs @@ -8,7 +8,7 @@ use crate::plonk::Assigned; use crate::SerdeFormat; use group::ff::{BatchInvert, Field}; #[cfg(feature = "parallel-poly-read")] -use maybe_rayon::{iter::ParallelIterator, prelude::ParallelSliceMut}; +use maybe_rayon::{iter::ParallelIterator, prelude::ParallelSlice}; use std::fmt::Debug; use std::io; use std::marker::PhantomData; From cfff1bde1a8a5c70e020b4d55442f35c1511b591 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:06:23 -0400 Subject: [PATCH 05/11] Revert "fix: non work stealing to improve pk loading on linux" This reverts commit 4b1ac89add55614be3cd0a933ddc0ae3385b3ddf. --- halo2_proofs/src/poly.rs | 41 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/halo2_proofs/src/poly.rs b/halo2_proofs/src/poly.rs index fec65ccda9..db25c9759c 100644 --- a/halo2_proofs/src/poly.rs +++ b/halo2_proofs/src/poly.rs @@ -169,39 +169,22 @@ impl Polynomial { let poly_len = u32::from_be_bytes(poly_len) as usize; let repr_len = F::default().to_repr().as_ref().len(); - let total_len = poly_len * repr_len; - let mut new_vals = vec![0u8; total_len]; + let mut new_vals = vec![0u8; poly_len * repr_len]; reader.read_exact(&mut new_vals)?; - let default_parallelism_approx = match std::thread::available_parallelism() { - Ok(parallelism) => usize::from(parallelism), - _ => 1, - }; - - log::debug!( - "[parallel-poly-read] default_parallelism_approx = {}", - default_parallelism_approx - ); - - let num_of_reps_per_chunk = std::cmp::max(1, poly_len / default_parallelism_approx); - let chunk_size = num_of_reps_per_chunk * repr_len; - // parallel read - Ok(Self { - values: new_vals - .par_chunks(chunk_size) - .map(|mut chunk| { - let mut felts = Vec::with_capacity(num_of_reps_per_chunk); - while !chunk.is_empty() { - felts.push(F::read(&mut chunk, format).unwrap()); - } - felts - }) - .flatten() - .collect::>(), - _marker: PhantomData, - }) + new_vals + .par_chunks_mut(repr_len) + .map(|chunk| { + let mut chunk = io::Cursor::new(chunk); + F::read(&mut chunk, format) + }) + .collect::>>() + .map(|values| Self { + values, + _marker: PhantomData, + }) } /// Reads polynomial from buffer using `SerdePrimeField::read`. From 8cfca221f53069a0374687654882b99e729041d7 Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:06:30 -0400 Subject: [PATCH 06/11] Revert "fix: rayon imports" This reverts commit 13e0333fe042c3f8b528952c173adc99438249f0. --- halo2_proofs/src/poly.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/halo2_proofs/src/poly.rs b/halo2_proofs/src/poly.rs index db25c9759c..753b65eb82 100644 --- a/halo2_proofs/src/poly.rs +++ b/halo2_proofs/src/poly.rs @@ -8,7 +8,7 @@ use crate::plonk::Assigned; use crate::SerdeFormat; use group::ff::{BatchInvert, Field}; #[cfg(feature = "parallel-poly-read")] -use maybe_rayon::{iter::ParallelIterator, prelude::ParallelSlice}; +use maybe_rayon::{iter::ParallelIterator, prelude::ParallelSliceMut}; use std::fmt::Debug; use std::io; use std::marker::PhantomData; From 098ac0ef3b29255e0e2524ecbb4e7e325ae6e7fd Mon Sep 17 00:00:00 2001 From: dante <45801863+alexander-camuto@users.noreply.github.com> Date: Sun, 4 Aug 2024 10:40:44 -0400 Subject: [PATCH 07/11] chore: swap hash function for rustc hasher (#12) --- halo2_proofs/Cargo.toml | 1 + halo2_proofs/benches/commit_zk.rs | 3 ++- halo2_proofs/src/circuit/floor_planner/single_pass.rs | 2 +- halo2_proofs/src/circuit/floor_planner/v1/strategy.rs | 8 +++----- halo2_proofs/src/circuit/layouter.rs | 2 +- halo2_proofs/src/circuit/table_layouter.rs | 7 +++---- halo2_proofs/src/dev.rs | 4 ++-- halo2_proofs/src/dev/cost.rs | 11 ++++++----- halo2_proofs/src/dev/cost_model.rs | 2 +- halo2_proofs/src/dev/failure.rs | 4 +++- halo2_proofs/src/dev/graph/layout.rs | 2 +- halo2_proofs/src/dev/metadata.rs | 7 +++---- halo2_proofs/src/plonk/circuit.rs | 6 ++++-- halo2_proofs/src/plonk/permutation/keygen.rs | 5 ++++- halo2_proofs/src/plonk/prover.rs | 11 ++++++++--- halo2_proofs/src/plonk/vanishing/prover.rs | 4 ++-- 16 files changed, 45 insertions(+), 34 deletions(-) diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index 0d811dd77a..83adc2e834 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -61,6 +61,7 @@ rand_chacha = "0.3" maybe-rayon = { version = "0.1.1"} lazy_static = { version = "1", optional = true } env_logger = "0.10.0" +rustc-hash = "2.0.0" # GPU Icicle integration icicle = { git = "https://github.com/ingonyama-zk/icicle.git", branch = "rust/large-bucket-factor-msm", optional = true } diff --git a/halo2_proofs/benches/commit_zk.rs b/halo2_proofs/benches/commit_zk.rs index 9901f686a6..158f526444 100644 --- a/halo2_proofs/benches/commit_zk.rs +++ b/halo2_proofs/benches/commit_zk.rs @@ -8,7 +8,8 @@ use maybe_rayon::current_num_threads; use rand_chacha::rand_core::RngCore; use rand_chacha::ChaCha20Rng; use rand_core::SeedableRng; -use std::{collections::HashMap, iter}; +use rustc_hash::FxHashMap as HashMap; +use std::iter; fn rand_poly_serial(mut rng: ChaCha20Rng, domain: usize) -> Vec { // Sample a random polynomial of degree n - 1 diff --git a/halo2_proofs/src/circuit/floor_planner/single_pass.rs b/halo2_proofs/src/circuit/floor_planner/single_pass.rs index 33c09e4c57..ff02ed02fb 100644 --- a/halo2_proofs/src/circuit/floor_planner/single_pass.rs +++ b/halo2_proofs/src/circuit/floor_planner/single_pass.rs @@ -1,5 +1,5 @@ +use rustc_hash::FxHashMap as HashMap; use std::cmp; -use std::collections::HashMap; use std::fmt; use std::marker::PhantomData; diff --git a/halo2_proofs/src/circuit/floor_planner/v1/strategy.rs b/halo2_proofs/src/circuit/floor_planner/v1/strategy.rs index 71745de245..b594cd942e 100644 --- a/halo2_proofs/src/circuit/floor_planner/v1/strategy.rs +++ b/halo2_proofs/src/circuit/floor_planner/v1/strategy.rs @@ -1,8 +1,6 @@ -use std::{ - cmp, - collections::{BTreeSet, HashMap}, - ops::Range, -}; +use std::{cmp, collections::BTreeSet, ops::Range}; + +use rustc_hash::FxHashMap as HashMap; use super::{RegionColumn, RegionShape}; use crate::{circuit::RegionStart, plonk::Any}; diff --git a/halo2_proofs/src/circuit/layouter.rs b/halo2_proofs/src/circuit/layouter.rs index f939c3fca5..78616d2e2a 100644 --- a/halo2_proofs/src/circuit/layouter.rs +++ b/halo2_proofs/src/circuit/layouter.rs @@ -1,7 +1,7 @@ //! Implementations of common circuit layouters. +use rustc_hash::FxHashSet as HashSet; use std::cmp; -use std::collections::HashSet; use std::fmt; use ff::Field; diff --git a/halo2_proofs/src/circuit/table_layouter.rs b/halo2_proofs/src/circuit/table_layouter.rs index 06338bb896..ebeb381d99 100644 --- a/halo2_proofs/src/circuit/table_layouter.rs +++ b/halo2_proofs/src/circuit/table_layouter.rs @@ -1,9 +1,8 @@ //! Implementations of common table layouters. -use std::{ - collections::HashMap, - fmt::{self, Debug}, -}; +use std::fmt::{self, Debug}; + +use rustc_hash::FxHashMap as HashMap; use ff::Field; diff --git a/halo2_proofs/src/dev.rs b/halo2_proofs/src/dev.rs index f06278e429..066ad43f6c 100644 --- a/halo2_proofs/src/dev.rs +++ b/halo2_proofs/src/dev.rs @@ -1,7 +1,7 @@ //! Tools for developing circuits. -use std::collections::HashMap; -use std::collections::HashSet; +use rustc_hash::FxHashMap as HashMap; +use rustc_hash::FxHashSet as HashSet; use std::iter; use std::ops::{Add, Mul, Neg, Range}; diff --git a/halo2_proofs/src/dev/cost.rs b/halo2_proofs/src/dev/cost.rs index 6c1a0f7d45..e754e44050 100644 --- a/halo2_proofs/src/dev/cost.rs +++ b/halo2_proofs/src/dev/cost.rs @@ -1,13 +1,14 @@ //! Developer tools for investigating the cost of a circuit. use std::{ - cmp, - collections::{HashMap, HashSet}, - iter, + cmp, iter, marker::PhantomData, ops::{Add, Mul}, }; +use rustc_hash::FxHashMap as HashMap; +use rustc_hash::FxHashSet as HashSet; + use ff::{Field, PrimeField}; use group::prime::PrimeGroup; @@ -288,7 +289,7 @@ impl> CircuitCost= cs.minimum_rows()); // Figure out how many point sets we have due to queried cells. - let mut column_queries: HashMap, HashSet> = HashMap::new(); + let mut column_queries: HashMap, HashSet> = HashMap::default(); for (c, r) in iter::empty() .chain( cs.advice_queries @@ -306,7 +307,7 @@ impl> CircuitCost> = HashSet::new(); + let mut point_sets: HashSet> = HashSet::default(); for (_, r) in column_queries { // Sort the query sets so we merge duplicates. let mut query_set: Vec<_> = r.into_iter().collect(); diff --git a/halo2_proofs/src/dev/cost_model.rs b/halo2_proofs/src/dev/cost_model.rs index 51b3a1ad76..4b4d30c304 100644 --- a/halo2_proofs/src/dev/cost_model.rs +++ b/halo2_proofs/src/dev/cost_model.rs @@ -1,7 +1,7 @@ //! The cost estimator takes high-level parameters for a circuit design, and estimates the //! verification cost, as well as resulting proof size. -use std::collections::HashSet; +use rustc_hash::FxHashSet as HashSet; use std::{iter, num::ParseIntError, str::FromStr}; use crate::plonk::Circuit; diff --git a/halo2_proofs/src/dev/failure.rs b/halo2_proofs/src/dev/failure.rs index a532fca1fc..ce8cd5b36f 100644 --- a/halo2_proofs/src/dev/failure.rs +++ b/halo2_proofs/src/dev/failure.rs @@ -1,8 +1,10 @@ -use std::collections::{BTreeMap, HashSet}; +use std::collections::BTreeMap; use std::fmt::{self, Debug}; use group::ff::Field; +use rustc_hash::FxHashSet as HashSet; + use super::metadata::{DebugColumn, DebugVirtualCell}; use super::MockProver; use super::{ diff --git a/halo2_proofs/src/dev/graph/layout.rs b/halo2_proofs/src/dev/graph/layout.rs index 94bd7eea14..46c8dcccff 100644 --- a/halo2_proofs/src/dev/graph/layout.rs +++ b/halo2_proofs/src/dev/graph/layout.rs @@ -3,7 +3,7 @@ use plotters::{ coord::Shift, prelude::{DrawingArea, DrawingAreaErrorKind, DrawingBackend}, }; -use std::collections::HashSet; +use rustc_hash::FxHashSet as HashSet; use std::ops::Range; use crate::{ diff --git a/halo2_proofs/src/dev/metadata.rs b/halo2_proofs/src/dev/metadata.rs index f81bfa67a7..d2db16fa2d 100644 --- a/halo2_proofs/src/dev/metadata.rs +++ b/halo2_proofs/src/dev/metadata.rs @@ -2,10 +2,9 @@ use super::metadata::Column as ColumnMetadata; use crate::plonk::{self, Any}; -use std::{ - collections::HashMap, - fmt::{self, Debug}, -}; +use std::fmt::{self, Debug}; + +use rustc_hash::FxHashMap as HashMap; /// Metadata about a column within a circuit. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Column { diff --git a/halo2_proofs/src/plonk/circuit.rs b/halo2_proofs/src/plonk/circuit.rs index 550518be1e..dc65fdb6aa 100644 --- a/halo2_proofs/src/plonk/circuit.rs +++ b/halo2_proofs/src/plonk/circuit.rs @@ -9,7 +9,7 @@ use core::cmp::max; use core::ops::{Add, Mul}; use ff::Field; use sealed::SealedPhase; -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; use std::fmt::Debug; use std::iter::{Product, Sum}; use std::{ @@ -17,6 +17,8 @@ use std::{ ops::{Neg, Sub}, }; +use rustc_hash::FxHashMap as HashMap; + #[cfg(not(feature = "mv-lookup"))] use super::lookup; #[cfg(feature = "mv-lookup")] @@ -1701,7 +1703,7 @@ impl Default for ConstraintSystem { lookups_map: BTreeMap::default(), lookups: Vec::new(), shuffles: Vec::new(), - general_column_annotations: HashMap::new(), + general_column_annotations: HashMap::default(), constants: vec![], minimum_degree: None, } diff --git a/halo2_proofs/src/plonk/permutation/keygen.rs b/halo2_proofs/src/plonk/permutation/keygen.rs index 7c3aec9d8a..2ac6eb8e76 100644 --- a/halo2_proofs/src/plonk/permutation/keygen.rs +++ b/halo2_proofs/src/plonk/permutation/keygen.rs @@ -14,7 +14,10 @@ use crate::{ use maybe_rayon::prelude::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; #[cfg(feature = "thread-safe-region")] -use std::collections::{BTreeSet, HashMap}; +use std::collections::BTreeSet; + +#[cfg(feature = "thread-safe-region")] +use rustc_hash::FxHashMap as HashMap; #[cfg(not(feature = "thread-safe-region"))] /// Struct that accumulates all the necessary data in order to construct the permutation argument. diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index d058317206..e617311eec 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -1,9 +1,12 @@ use ff::{Field, FromUniformBytes, WithSmallOrderMulGroup}; use group::Curve; use rand_core::RngCore; -use std::collections::{BTreeSet, HashSet}; +use rustc_hash::FxBuildHasher; +use rustc_hash::FxHashMap as HashMap; +use rustc_hash::FxHashSet as HashSet; +use std::collections::BTreeSet; +use std::iter; use std::ops::RangeTo; -use std::{collections::HashMap, iter}; use super::{ circuit::{ @@ -311,7 +314,9 @@ where }; instances.len() ]; - let mut challenges = HashMap::::with_capacity(meta.num_challenges); + let s = FxBuildHasher::default(); + let mut challenges = + HashMap::::with_capacity_and_hasher(meta.num_challenges, s); let unusable_rows_start = params.n() as usize - (meta.blinding_factors() + 1); for current_phase in pk.vk.cs.phases() { diff --git a/halo2_proofs/src/plonk/vanishing/prover.rs b/halo2_proofs/src/plonk/vanishing/prover.rs index 7943086826..17603e64cd 100644 --- a/halo2_proofs/src/plonk/vanishing/prover.rs +++ b/halo2_proofs/src/plonk/vanishing/prover.rs @@ -1,9 +1,9 @@ -use std::{collections::HashMap, iter}; - use ff::Field; use group::Curve; use rand_chacha::ChaCha20Rng; use rand_core::{RngCore, SeedableRng}; +use rustc_hash::FxHashMap as HashMap; +use std::iter; use super::Argument; use crate::{ From 0d170e3aca5d59db1b16e91c1ef9e224d8e002ad Mon Sep 17 00:00:00 2001 From: emirsoyturk Date: Mon, 26 Aug 2024 12:04:21 +0300 Subject: [PATCH 08/11] icicle v2 integration --- halo2_proofs/Cargo.toml | 14 +- halo2_proofs/benches/fft.rs | 2 +- halo2_proofs/src/arithmetic.rs | 92 +++++++- halo2_proofs/src/dev.rs | 3 +- halo2_proofs/src/icicle.rs | 200 ++++++++---------- halo2_proofs/src/multicore.rs | 4 +- halo2_proofs/src/plonk/evaluation.rs | 8 +- halo2_proofs/src/plonk/prover.rs | 25 +-- halo2_proofs/src/poly/domain.rs | 22 +- halo2_proofs/src/poly/ipa/commitment.rs | 8 +- .../src/poly/ipa/commitment/prover.rs | 10 +- halo2_proofs/src/poly/ipa/msm.rs | 4 +- halo2_proofs/src/poly/ipa/strategy.rs | 4 +- halo2_proofs/src/poly/kzg/commitment.rs | 45 +--- halo2_proofs/src/poly/kzg/msm.rs | 5 +- 15 files changed, 217 insertions(+), 229 deletions(-) diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index 0af0b87e65..3d109fc5c5 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -63,7 +63,9 @@ lazy_static = { version = "1", optional = true } env_logger = "0.10.0" # GPU Icicle integration -icicle = { git = "https://github.com/ingonyama-zk/icicle.git", branch = "rust/large-bucket-factor-msm", optional = true } +icicle-core = { git = "https://github.com/ingonyama-zk/icicle", branch="ezkl-icicle2", package="icicle-core", optional = true } +icicle-bn254 = { git = "https://github.com/ingonyama-zk/icicle", branch="ezkl-icicle2", package="icicle-bn254", optional = true } +icicle-cuda-runtime = { git = "https://github.com/ingonyama-zk/icicle", branch="ezkl-icicle2", package="icicle-cuda-runtime", optional = true } rustacuda = { version = "0.1", optional = true } serde = { version = "1", optional = true, features = ["derive"] } serde_derive = { version = "1", optional = true} @@ -90,10 +92,10 @@ getrandom = { version = "0.2", features = ["js"] } default = ["batch", "bits"] dev-graph = ["plotters", "tabbycat"] test-dev-graph = [ - "dev-graph", - "plotters/bitmap_backend", - "plotters/bitmap_encoder", - "plotters/ttf", + "dev-graph", + "plotters/bitmap_backend", + "plotters/bitmap_encoder", + "plotters/ttf" ] bits = ["halo2curves/bits"] gadget-traces = ["backtrace"] @@ -102,7 +104,7 @@ sanity-checks = [] batch = ["rand_core/getrandom"] circuit-params = [] counter = ["lazy_static"] -icicle_gpu = ["icicle", "rustacuda"] +icicle_gpu = ["icicle-cuda-runtime", "icicle-bn254", "icicle-core"] mv-lookup = [] cost-estimator = ["serde", "serde_derive"] derive_serde = ["halo2curves/derive_serde"] diff --git a/halo2_proofs/benches/fft.rs b/halo2_proofs/benches/fft.rs index 0de72a0380..4b5019a520 100644 --- a/halo2_proofs/benches/fft.rs +++ b/halo2_proofs/benches/fft.rs @@ -16,7 +16,7 @@ fn criterion_benchmark(c: &mut Criterion) { let mut a = (0..(1 << k)).map(|_| Fp::random(OsRng)).collect::>(); let omega = Fp::random(OsRng); // would be weird if this mattered b.iter(|| { - best_fft(&mut a, omega, k as u32); + best_fft(&mut a, omega, k as u32, false); }); }); } diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs index 02d3330c0a..f65e88ba9a 100644 --- a/halo2_proofs/src/arithmetic.rs +++ b/halo2_proofs/src/arithmetic.rs @@ -1,6 +1,8 @@ //! This module provides common utilities, traits and structures for group, //! field and polynomial arithmetic. +#[cfg(feature = "icicle_gpu")] +use super::icicle; use super::multicore; pub use ff::Field; use group::{ @@ -9,11 +11,6 @@ use group::{ }; pub use halo2curves::{CurveAffine, CurveExt}; -#[cfg(feature = "icicle_gpu")] -use super::icicle; -#[cfg(feature = "icicle_gpu")] -use rustacuda::prelude::DeviceBuffer; - /// This represents an element of a group with basic operations that can be /// performed. This allows an FFT implementation (for example) to operate /// generically over either a field or elliptic curve group. @@ -145,11 +142,75 @@ pub fn small_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::C #[cfg(feature = "icicle_gpu")] /// Performs a multi-exponentiation operation on GPU using Icicle library -pub fn best_multiexp_gpu(coeffs: &[C::Scalar], is_lagrange: bool) -> C::Curve { - let scalars_ptr: DeviceBuffer<::icicle::curves::bn254::ScalarField_BN254> = - icicle::copy_scalars_to_device::(coeffs); +pub fn best_multiexp_gpu(coeffs: &[C::Scalar], g: &[C]) -> C::Curve { + icicle::multiexp_on_device::(coeffs, g) +} + +/// Performs a multi-exponentiation operation +pub fn best_multiexp(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { + #[cfg(not(feature = "icicle_gpu"))] + { + best_multiexp_cpu(coeffs, bases) + } - return icicle::multiexp_on_device::(scalars_ptr, is_lagrange); + #[cfg(feature = "icicle_gpu")] + if !icicle::should_use_cpu_msm(bases.len()) && icicle::is_gpu_supported_field(&coeffs[0]) { + best_multiexp_gpu(coeffs, bases) + } else { + best_multiexp_cpu(coeffs, bases) + } +} + +/// Performs a FTT operation +pub fn best_ftt + ff::PrimeField>( + scalars: &mut [G], + omega: Scalar, + log_n: u32, +) { + #[cfg(not(feature = "icicle_gpu"))] + { + best_fft(scalars, omega, log_n); + } + + #[cfg(feature = "icicle_gpu")] + { + if !icicle::should_use_cpu_fft(scalars.len()) && icicle::is_gpu_supported_field(&scalars[0]) { + best_fft_gpu(scalars, omega, log_n, false); + } else { + best_fft(scalars, omega, log_n); + } + } +} + +/// Performs a iNTT operation +pub fn best_iftt + ff::PrimeField>( + scalars: &mut [G], + omega: Scalar, + log_n: u32, + divisor: Scalar, +) { + #[cfg(feature = "icicle_gpu")] + { + if !icicle::should_use_cpu_fft(scalars.len()) && icicle::is_gpu_supported_field(&scalars[0]) { + best_fft_gpu(scalars, omega, log_n, true); + } else { + best_fft(scalars, omega, log_n); + parallelize(scalars, |a, _| { + for a in a { + *a *= &divisor; + } + }); + } + } + #[cfg(not(feature = "icicle_gpu"))] + { + best_fft(scalars, omega, log_n); + parallelize(scalars, |a, _| { + for a in a { + *a *= &divisor; + } + }); + } } /// Performs a multi-exponentiation operation. @@ -182,10 +243,22 @@ pub fn best_multiexp_cpu(coeffs: &[C::Scalar], bases: &[C]) -> C } else { let mut acc = C::Curve::identity(); multiexp_serial(coeffs, bases, &mut acc); + acc } } +/// Performs a NTT operation on GPU using Icicle library +#[cfg(feature = "icicle_gpu")] +pub fn best_fft_gpu + ff::PrimeField>( + a: &mut [G], + omega: Scalar, + log_n: u32, + inverse: bool, +) { + icicle::fft_on_device::(a, omega, log_n, inverse); +} + /// Performs a radix-$2$ Fast-Fourier Transformation (FFT) on a vector of size /// $n = 2^k$, when provided `log_n` = $k$ and an element of multiplicative /// order $n$ called `omega` ($\omega$). The result is that the vector `a`, when @@ -226,7 +299,6 @@ pub fn best_fft>(a: &mut [G], omega: Scalar, Some(tw) }) .collect(); - if log_n <= log_threads { let mut chunk = 2_usize; let mut twiddle_chunk = n / 2; diff --git a/halo2_proofs/src/dev.rs b/halo2_proofs/src/dev.rs index f06278e429..ba75769c86 100644 --- a/halo2_proofs/src/dev.rs +++ b/halo2_proofs/src/dev.rs @@ -21,8 +21,7 @@ use crate::{ }; use maybe_rayon::prelude::{ - IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelIterator, - ParallelSliceMut, + IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelIterator, ParallelSliceMut }; pub mod metadata; diff --git a/halo2_proofs/src/icicle.rs b/halo2_proofs/src/icicle.rs index cd50af6582..aeac3c5875 100644 --- a/halo2_proofs/src/icicle.rs +++ b/halo2_proofs/src/icicle.rs @@ -1,48 +1,35 @@ use group::ff::PrimeField; -use icicle::{ - curves::bn254::{Point_BN254, ScalarField_BN254}, - test_bn254::commit_bn254, -}; -use std::sync::{Arc, Once}; - -pub use icicle::curves::bn254::PointAffineNoInfinity_BN254; -use rustacuda::memory::CopyDestination; -use rustacuda::prelude::*; - +use icicle_bn254::curve::{CurveCfg, G1Projective, ScalarField}; +use halo2curves::bn256::Fr as Bn256Fr; +use icicle_cuda_runtime::{stream::CudaStream, memory::{DeviceVec, HostSlice}}; +use crate::arithmetic::FftGroup; +use std::any::TypeId; +use std::any::Any; pub use halo2curves::CurveAffine; +use icicle_core::{ + curve::Affine, + msm, + ntt::{get_root_of_unity, initialize_domain, ntt_inplace, NTTConfig, NTTDir}, +}; +use maybe_rayon::iter::IntoParallelRefIterator; +use maybe_rayon::iter::ParallelIterator; use std::{env, mem}; -static mut GPU_CONTEXT: Option = None; -static mut GPU_G: Option> = None; -static mut GPU_G_LAGRANGE: Option> = None; -static GPU_INIT: Once = Once::new(); - pub fn should_use_cpu_msm(size: usize) -> bool { size <= (1 << u8::from_str_radix(&env::var("ICICLE_SMALL_K").unwrap_or("8".to_string()), 10).unwrap()) } -pub fn init_gpu(g: &[C], g_lagrange: &[C]) { - unsafe { - GPU_INIT.call_once(|| { - GPU_CONTEXT = Some(rustacuda::quick_init().unwrap()); - GPU_G = Some(copy_points_to_device(g)); - GPU_G_LAGRANGE = Some(copy_points_to_device(g_lagrange)); - }); - } +pub fn should_use_cpu_fft(size: usize) -> bool { + size <= (1 + << u8::from_str_radix(&env::var("ICICLE_SMALL_K_FFT").unwrap_or("8".to_string()), 10).unwrap()) } -fn u32_from_u8(u8_arr: &[u8; 32]) -> [u32; 8] { - let mut t = [0u32; 8]; - for i in 0..8 { - t[i] = u32::from_le_bytes([ - u8_arr[4 * i], - u8_arr[4 * i + 1], - u8_arr[4 * i + 2], - u8_arr[4 * i + 3], - ]); +pub fn is_gpu_supported_field(_sample_element: &G) -> bool { + match TypeId::of::() { + id if id == TypeId::of::() => true, + _ => false, } - return t; } fn repr_from_u32(u32_arr: &[u32; 8]) -> ::Base { @@ -51,96 +38,93 @@ fn repr_from_u32(u32_arr: &[u32; 8]) -> ::Base return PrimeField::from_repr(t[0]).unwrap(); } -fn is_infinity_point(point: Point_BN254) -> bool { - let inf_point = Point_BN254::infinity(); - point.z.s.eq(&inf_point.z.s) +fn icicle_scalars_from_c_scalars(coeffs: &[G]) -> Vec { + coeffs.par_iter().map(|coef| { + let repr: [u32; 8] = unsafe { mem::transmute_copy(&coef.to_repr()) }; + ScalarField::from(repr) + }).collect() } -fn icicle_scalars_from_c(coeffs: &[C::Scalar]) -> Vec { - let _coeffs = [Arc::new( - coeffs.iter().map(|x| x.to_repr()).collect::>(), - )]; - - let _coeffs: &Arc> = unsafe { mem::transmute(&_coeffs) }; - _coeffs - .iter() - .map(|x| ScalarField_BN254::from_limbs(x)) - .collect::>() +fn c_scalars_from_icicle_scalars(scalars: &[ScalarField]) -> Vec { + scalars.par_iter().map(|scalar| { + let repr: G::Repr = unsafe { mem::transmute_copy(scalar) }; + G::from_repr(repr).unwrap() + }).collect() } -pub fn copy_scalars_to_device( - coeffs: &[C::Scalar], -) -> DeviceBuffer { - let scalars = icicle_scalars_from_c::(coeffs); - - DeviceBuffer::from_slice(scalars.as_slice()).unwrap() -} +fn icicle_points_from_c(bases: &[C]) -> Vec> { + bases.par_iter().map(|p| { + let coordinates = p.coordinates().unwrap(); + let x_repr: [u32; 8] = unsafe { mem::transmute_copy(&coordinates.x().to_repr()) }; + let y_repr: [u32; 8] = unsafe { mem::transmute_copy(&coordinates.y().to_repr()) }; -fn icicle_points_from_c(bases: &[C]) -> Vec { - let _bases = [Arc::new( - bases - .iter() - .map(|p| { - let coordinates = p.coordinates().unwrap(); - [coordinates.x().to_repr(), coordinates.y().to_repr()] - }) - .collect::>(), - )]; - - let _bases: &Arc> = unsafe { mem::transmute(&_bases) }; - _bases - .iter() - .map(|x| { - let tx = u32_from_u8(&x[0]); - let ty = u32_from_u8(&x[1]); - PointAffineNoInfinity_BN254::from_limbs(&tx, &ty) - }) - .collect::>() + Affine::::from_limbs(x_repr, y_repr) + }).collect() } -pub fn copy_points_to_device( - bases: &[C], -) -> DeviceBuffer { - let points = icicle_points_from_c(bases); +fn c_from_icicle_point(point: &G1Projective) -> C::Curve { + let (x, y) = { + let affine: Affine = Affine::::from(*point); - DeviceBuffer::from_slice(points.as_slice()).unwrap() -} - -fn c_from_icicle_point(commit_res: Point_BN254) -> C::Curve { - let (x, y) = if is_infinity_point(commit_res) { ( - repr_from_u32::(&[0u32; 8]), - repr_from_u32::(&[0u32; 8]), - ) - } else { - let affine_res_from_cuda = commit_res.to_affine(); - ( - repr_from_u32::(&affine_res_from_cuda.x.s), - repr_from_u32::(&affine_res_from_cuda.y.s), + repr_from_u32::(&affine.x.into()), + repr_from_u32::(&affine.y.into()), ) }; - let affine = C::from_xy(x, y).unwrap(); - return affine.to_curve(); + let affine = C::from_xy(x, y); + + return affine.unwrap().to_curve(); } -pub fn multiexp_on_device( - mut coeffs: DeviceBuffer, - is_lagrange: bool, -) -> C::Curve { - let base_ptr: &mut DeviceBuffer; - unsafe { - if is_lagrange { - base_ptr = GPU_G_LAGRANGE.as_mut().unwrap(); - } else { - base_ptr = GPU_G.as_mut().unwrap(); - }; - } +pub fn multiexp_on_device(coeffs: &[C::Scalar], bases: &[C]) -> C::Curve { + let binding = icicle_scalars_from_c_scalars::(coeffs); + let coeffs = HostSlice::from_slice(&binding[..]); + let binding = icicle_points_from_c(bases); + let bases = HostSlice::from_slice(&binding[..]); + + let mut msm_results = DeviceVec::::cuda_malloc(1).unwrap(); + let mut cfg = msm::MSMConfig::default(); + let stream = CudaStream::create().unwrap(); + cfg.ctx.stream = &stream; + cfg.is_async = true; + cfg.large_bucket_factor = 10; + cfg.c = 16; - let d_commit_result = commit_bn254(base_ptr, &mut coeffs, 10); + msm::msm(coeffs, bases, &cfg, &mut msm_results[..]).unwrap(); + stream.synchronize().unwrap(); - let mut h_commit_result = Point_BN254::zero(); - d_commit_result.copy_to(&mut h_commit_result).unwrap(); + let mut msm_host_result = vec![G1Projective::zero(); 1]; + msm_results + .copy_to_host(HostSlice::from_mut_slice(&mut msm_host_result[..])) + .unwrap(); - c_from_icicle_point::(h_commit_result) + let msm_point = c_from_icicle_point::(&msm_host_result[0]); + + msm_point +} +pub fn fft_on_device + ff::PrimeField>( + scalars: &mut [G], + _omega: Scalar, + log_n: u32, + inverse: bool +) { + let size: usize = 1 << log_n; + let mut cfg = NTTConfig::<'_, ScalarField>::default(); + cfg.is_async = false; + + let icicle_omega = get_root_of_unity::((size as u64) * 10); + initialize_domain(icicle_omega, &cfg.ctx, true).unwrap(); + + let mut icicle_scalars: Vec = icicle_scalars_from_c_scalars(scalars); + let host_scalars = HostSlice::from_mut_slice(&mut icicle_scalars); + + ntt_inplace::( + host_scalars, + if inverse { NTTDir::kInverse } else { NTTDir::kForward }, + &cfg, + ).unwrap(); + + let c_scalars = &c_scalars_from_icicle_scalars::(&mut host_scalars.as_slice())[..]; + scalars.copy_from_slice(&c_scalars); } diff --git a/halo2_proofs/src/multicore.rs b/halo2_proofs/src/multicore.rs index 8be2bfba9d..b85e823f5b 100644 --- a/halo2_proofs/src/multicore.rs +++ b/halo2_proofs/src/multicore.rs @@ -1,9 +1,7 @@ pub use maybe_rayon::{ current_num_threads, - iter::{IntoParallelIterator, IntoParallelRefMutIterator, ParallelIterator}, + iter::{IntoParallelIterator, ParallelIterator}, join, scope, - slice::ParallelSliceMut, - Scope, }; pub trait TryFoldAndReduce { diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index afd1fd3ea6..0e8cdae9ce 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -386,7 +386,6 @@ impl Evaluator { let l_last = &pk.l_last; let l_active_row = &pk.l_active_row; let p = &pk.vk.cs.permutation; - // Calculate the advice and instance cosets let advice: Vec>> = advice_polys .iter() @@ -448,7 +447,7 @@ impl Evaluator { }); // Permutations - let sets = &permutation.sets; + let sets: &_ = &permutation.sets; if !sets.is_empty() { let blinding_factors = pk.vk.cs.blinding_factors(); let last_rotation = Rotation(-((blinding_factors + 1) as i32)); @@ -595,7 +594,6 @@ impl Evaluator { // they are actually needed. let phi_coset = pk.vk.domain.coeff_to_extended(lookup.phi_poly.clone()); let m_coset = pk.vk.domain.coeff_to_extended(lookup.m_poly.clone()); - // Lookup constraints /* φ_i(X) = f_i(X) + α @@ -644,7 +642,6 @@ impl Evaluator { let inputs_prod: C::Scalar = inputs_value .iter() .fold(C::Scalar::ONE, |acc, input| acc * input); - // f_i(X) + α at ω^idx let fi_inverses = &inputs_inv_sum[n][idx]; let inputs_inv_sum = fi_inverses @@ -669,7 +666,6 @@ impl Evaluator { ); let r_next = get_rotation_idx(idx, 1, rot_scale, isize); - let lhs = { // τ(X) * Π(φ_i(X)) * (ϕ(gX) - ϕ(X)) table_value * inputs_prod * (phi_coset[r_next] - phi_coset[idx]) @@ -1099,4 +1095,4 @@ pub fn evaluate( } }); values -} +} \ No newline at end of file diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index d058317206..c5f3ab84f6 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -4,7 +4,6 @@ use rand_core::RngCore; use std::collections::{BTreeSet, HashSet}; use std::ops::RangeTo; use std::{collections::HashMap, iter}; - use super::{ circuit::{ sealed::{self}, @@ -96,13 +95,12 @@ where pub instance_values: Vec>, pub instance_polys: Vec>, } - let instance: Vec> = instances .iter() .map(|instance| -> Result, Error> { let instance_values = instance - .iter() - .map(|values| { + .iter() + .map(|values| { let mut poly = domain.empty_lagrange(); assert_eq!(poly.len(), params.n() as usize); if values.len() > (poly.len() - (meta.blinding_factors() + 1)) { @@ -117,7 +115,6 @@ where Ok(poly) }) .collect::, _>>()?; - if P::QUERY_INSTANCE { let instance_commitments_projective: Vec<_> = instance_values .iter() @@ -144,14 +141,12 @@ where domain.lagrange_to_coeff(lagrange_vec) }) .collect(); - Ok(InstanceSingle { instance_values, instance_polys, }) }) .collect::, _>>()?; - #[derive(Clone)] struct AdviceSingle { pub advice_polys: Vec>, @@ -327,7 +322,6 @@ where } }) .collect::>(); - for ((circuit, advice), instances) in circuits.iter().zip(advice.iter_mut()).zip(instances) { @@ -368,7 +362,6 @@ where }) .collect(), ); - // Add blinding factors to advice columns for (column_index, advice_values) in column_indices.iter().zip(&mut advice_values) { if !witness.unblinded_advice.contains(column_index) { @@ -381,7 +374,6 @@ where } } } - // Compute commitments to advice column polynomials let blinds: Vec<_> = column_indices .iter() @@ -417,7 +409,6 @@ where advice.advice_blinds[*column_index] = blind; } } - for (index, phase) in meta.challenge_phase.iter().enumerate() { if current_phase == *phase { let existing = @@ -435,9 +426,9 @@ where (advice, challenges) }; + // Sample theta challenge for keeping lookup columns linearly independent let theta: ChallengeTheta<_> = transcript.squeeze_challenge_scalar(); - #[cfg(feature = "mv-lookup")] let lookups: Vec>> = instance .iter() @@ -496,10 +487,10 @@ where // Sample beta challenge let beta: ChallengeBeta<_> = transcript.squeeze_challenge_scalar(); - // Sample gamma challenge let gamma: ChallengeGamma<_> = transcript.squeeze_challenge_scalar(); + // Commit to permutations. let permutations: Vec> = instance .iter() @@ -571,13 +562,11 @@ where .collect::, _>>() }) .collect::, _>>()?; - // Commit to the vanishing argument's random polynomial for blinding h(x_3) let vanishing = vanishing::Argument::commit(params, domain, &mut rng, transcript)?; // Obtain challenge for keeping all separate gates linearly independent let y: ChallengeY<_> = transcript.squeeze_challenge_scalar(); - // Calculate the advice polys let advice: Vec> = advice .into_iter() @@ -596,7 +585,6 @@ where }, ) .collect(); - // Evaluate the h(X) polynomial let h_poly = pk.ev.evaluate_h( pk, @@ -617,10 +605,8 @@ where &shuffles, &permutations, ); - // Construct the vanishing argument's h(X) commitments let vanishing = vanishing.construct(params, domain, h_poly, &mut rng, transcript)?; - let x: ChallengeX<_> = transcript.squeeze_challenge_scalar(); let xn = x.pow([params.n()]); @@ -665,7 +651,6 @@ where transcript.write_scalar(*eval)?; } } - // Compute and hash fixed evals (shared across all circuit instances) let fixed_evals: Vec<_> = meta .fixed_queries @@ -674,7 +659,6 @@ where eval_polynomial(&pk.fixed_polys[column.index()], domain.rotate_omega(*x, at)) }) .collect(); - // Hash each fixed column evaluation for eval in fixed_evals.iter() { transcript.write_scalar(*eval)?; @@ -713,6 +697,7 @@ where }) .collect::, _>>()?; + let instances = instance .iter() .zip(advice.iter()) diff --git a/halo2_proofs/src/poly/domain.rs b/halo2_proofs/src/poly/domain.rs index 3bf12643c6..e80a39eee9 100644 --- a/halo2_proofs/src/poly/domain.rs +++ b/halo2_proofs/src/poly/domain.rs @@ -2,12 +2,12 @@ //! domain that is of a suitable size for the application. use crate::{ - arithmetic::{best_fft, parallelize}, + arithmetic::{best_iftt, best_ftt, parallelize}, plonk::Assigned, }; use super::{Coeff, ExtendedLagrangeCoeff, LagrangeCoeff, Polynomial, Rotation}; -use ff::WithSmallOrderMulGroup; +use ff::{PrimeField, WithSmallOrderMulGroup}; use group::ff::{BatchInvert, Field}; use std::marker::PhantomData; @@ -16,7 +16,7 @@ use std::marker::PhantomData; /// performing operations on an evaluation domain of size $2^k$ and an extended /// domain of size $2^{k} * j$ with $j \neq 0$. #[derive(Clone, Debug)] -pub struct EvaluationDomain { +pub struct EvaluationDomain { n: u64, k: u32, extended_k: u32, @@ -235,7 +235,7 @@ impl> EvaluationDomain { assert_eq!(a.values.len(), 1 << self.k); // Perform inverse FFT to obtain the polynomial in coefficient form - Self::ifft(&mut a.values, self.omega_inv, self.k, self.ifft_divisor); + best_iftt(&mut a.values, self.omega_inv, self.k, self.ifft_divisor); Polynomial { values: a.values, @@ -253,7 +253,7 @@ impl> EvaluationDomain { self.distribute_powers_zeta(&mut a.values, true); a.values.resize(self.extended_len(), F::ZERO); - best_fft(&mut a.values, self.extended_omega, self.extended_k); + best_ftt(&mut a.values, self.extended_omega, self.extended_k); Polynomial { values: a.values, @@ -290,7 +290,7 @@ impl> EvaluationDomain { assert_eq!(a.values.len(), self.extended_len()); // Inverse FFT - Self::ifft( + best_iftt( &mut a.values, self.extended_omega_inv, self.extended_k, @@ -358,16 +358,6 @@ impl> EvaluationDomain { }); } - fn ifft(a: &mut [F], omega_inv: F, log_n: u32, divisor: F) { - best_fft(a, omega_inv, log_n); - parallelize(a, |a, _| { - for a in a { - // Finish iFFT - *a *= &divisor; - } - }); - } - /// Get the size of the domain pub fn k(&self) -> u32 { self.k diff --git a/halo2_proofs/src/poly/ipa/commitment.rs b/halo2_proofs/src/poly/ipa/commitment.rs index 96c98d5fbc..9d8644eb97 100644 --- a/halo2_proofs/src/poly/ipa/commitment.rs +++ b/halo2_proofs/src/poly/ipa/commitment.rs @@ -3,7 +3,7 @@ //! //! [halo]: https://eprint.iacr.org/2019/1021 -use crate::arithmetic::{best_multiexp_cpu, g_to_lagrange, parallelize, CurveAffine, CurveExt}; +use crate::arithmetic::{best_multiexp, g_to_lagrange, parallelize, CurveAffine, CurveExt}; use crate::helpers::CurveRead; use crate::poly::commitment::{Blind, CommitmentScheme, Params, ParamsProver, ParamsVerifier}; use crate::poly::ipa::msm::MSMIPA; @@ -99,7 +99,7 @@ impl<'params, C: CurveAffine> Params<'params, C> for ParamsIPA { tmp_bases.extend(self.g_lagrange.iter()); tmp_bases.push(self.w); - best_multiexp_cpu::(&tmp_scalars, &tmp_bases) + best_multiexp::(&tmp_scalars, &tmp_bases) } /// Writes params to a buffer. @@ -219,7 +219,7 @@ impl<'params, C: CurveAffine> ParamsProver<'params, C> for ParamsIPA { tmp_bases.extend(self.g.iter()); tmp_bases.push(self.w); - best_multiexp_cpu::(&tmp_scalars, &tmp_bases) + best_multiexp::(&tmp_scalars, &tmp_bases) } fn get_g(&self) -> &[C] { @@ -289,7 +289,7 @@ mod test { #[test] fn test_opening_proof() { - const K: u32 = 6; + const K: u32 = 8; use ff::Field; use rand_core::OsRng; diff --git a/halo2_proofs/src/poly/ipa/commitment/prover.rs b/halo2_proofs/src/poly/ipa/commitment/prover.rs index ab3b895fb8..b5e4633ffb 100644 --- a/halo2_proofs/src/poly/ipa/commitment/prover.rs +++ b/halo2_proofs/src/poly/ipa/commitment/prover.rs @@ -3,7 +3,7 @@ use rand_core::RngCore; use super::ParamsIPA; use crate::arithmetic::{ - best_multiexp_cpu, compute_inner_product, eval_polynomial, parallelize, CurveAffine, + best_multiexp, compute_inner_product, eval_polynomial, parallelize, CurveAffine, }; use crate::poly::commitment::ParamsProver; @@ -106,16 +106,16 @@ pub fn create_proof< // // TODO: If we modify multiexp to take "extra" bases, we could speed // this piece up a bit by combining the multiexps. - let l_j = best_multiexp_cpu(&p_prime[half..], &g_prime[0..half]); - let r_j = best_multiexp_cpu(&p_prime[0..half], &g_prime[half..]); + let l_j = best_multiexp(&p_prime[half..], &g_prime[0..half]); + let r_j = best_multiexp(&p_prime[0..half], &g_prime[half..]); let value_l_j = compute_inner_product(&p_prime[half..], &b[0..half]); let value_r_j = compute_inner_product(&p_prime[0..half], &b[half..]); let l_j_randomness = C::Scalar::random(&mut rng); let r_j_randomness = C::Scalar::random(&mut rng); let l_j = - l_j + &best_multiexp_cpu(&[value_l_j * &z, l_j_randomness], &[params.u, params.w]); + l_j + &best_multiexp(&[value_l_j * &z, l_j_randomness], &[params.u, params.w]); let r_j = - r_j + &best_multiexp_cpu(&[value_r_j * &z, r_j_randomness], &[params.u, params.w]); + r_j + &best_multiexp(&[value_r_j * &z, r_j_randomness], &[params.u, params.w]); let l_j = l_j.to_affine(); let r_j = r_j.to_affine(); diff --git a/halo2_proofs/src/poly/ipa/msm.rs b/halo2_proofs/src/poly/ipa/msm.rs index 27259703ed..a615ddce49 100644 --- a/halo2_proofs/src/poly/ipa/msm.rs +++ b/halo2_proofs/src/poly/ipa/msm.rs @@ -1,4 +1,4 @@ -use crate::arithmetic::{best_multiexp_cpu, CurveAffine}; +use crate::arithmetic::{best_multiexp, CurveAffine}; use crate::poly::{commitment::MSM, ipa::commitment::ParamsVerifierIPA}; use ff::Field; use group::Group; @@ -166,7 +166,7 @@ impl<'a, C: CurveAffine> MSM for MSMIPA<'a, C> { assert_eq!(scalars.len(), len); - best_multiexp_cpu(&scalars, &bases) + best_multiexp(&scalars, &bases) } fn bases(&self) -> Vec { diff --git a/halo2_proofs/src/poly/ipa/strategy.rs b/halo2_proofs/src/poly/ipa/strategy.rs index 82f727f06a..d2d1b3d364 100644 --- a/halo2_proofs/src/poly/ipa/strategy.rs +++ b/halo2_proofs/src/poly/ipa/strategy.rs @@ -2,7 +2,7 @@ use super::commitment::{IPACommitmentScheme, ParamsIPA}; use super::msm::MSMIPA; use super::multiopen::VerifierIPA; use crate::{ - arithmetic::best_multiexp_cpu, + arithmetic::best_multiexp, plonk::Error, poly::{ commitment::MSM, @@ -67,7 +67,7 @@ impl<'params, C: CurveAffine> GuardIPA<'params, C> { pub fn compute_g(&self) -> C { let s = compute_s(&self.u, C::Scalar::ONE); - best_multiexp_cpu(&s, &self.msm.params.g).to_affine() + best_multiexp(&s, &self.msm.params.g).to_affine() } } diff --git a/halo2_proofs/src/poly/kzg/commitment.rs b/halo2_proofs/src/poly/kzg/commitment.rs index 926b7adda6..aef0e3d211 100644 --- a/halo2_proofs/src/poly/kzg/commitment.rs +++ b/halo2_proofs/src/poly/kzg/commitment.rs @@ -1,11 +1,4 @@ -use crate::arithmetic::{best_multiexp_cpu, g_to_lagrange, parallelize}; - -#[cfg(feature = "icicle_gpu")] -use crate::arithmetic::best_multiexp_gpu; -#[cfg(feature = "icicle_gpu")] -use crate::icicle; -#[cfg(feature = "icicle_gpu")] -use std::env; +use crate::arithmetic::{best_multiexp, g_to_lagrange, parallelize}; use crate::helpers::SerdeCurveAffine; use crate::poly::commitment::{Blind, CommitmentScheme, Params, ParamsProver, ParamsVerifier}; @@ -125,11 +118,6 @@ where g_lagrange }; - #[cfg(feature = "icicle_gpu")] - if env::var("ENABLE_ICICLE_GPU").is_ok() { - icicle::init_gpu::(&g, &g_lagrange); - } - let g2 = ::generator(); let s_g2 = (g2 * s).into(); @@ -164,11 +152,6 @@ where None => g_to_lagrange(g.iter().map(PrimeCurveAffine::to_curve).collect(), k), }; - #[cfg(feature = "icicle_gpu")] - if env::var("ENABLE_ICICLE_GPU").is_ok() { - icicle::init_gpu::(&g, &g_lagrange); - } - Self { k, n: 1 << k, @@ -279,11 +262,6 @@ where } }; - #[cfg(feature = "icicle_gpu")] - if env::var("ENABLE_ICICLE_GPU").is_ok() { - icicle::init_gpu::(&g, &g_lagrange); - } - let g2 = E::G2Affine::read(reader, format)?; let s_g2 = E::G2Affine::read(reader, format)?; @@ -340,15 +318,7 @@ where let size = scalars.len(); assert!(bases.len() >= size); - #[cfg(feature = "icicle_gpu")] - if env::var("ENABLE_ICICLE_GPU").is_ok() && !icicle::should_use_cpu_msm(size) { - best_multiexp_gpu::(&scalars, true) - } else { - best_multiexp_cpu(&scalars, &bases[0..size]) - } - - #[cfg(not(feature = "icicle_gpu"))] - best_multiexp_cpu(&scalars, &bases[0..size]) + best_multiexp(&scalars, &bases[0..size]) } /// Writes params to a buffer. @@ -392,16 +362,7 @@ where let bases = &self.g; let size = scalars.len(); assert!(bases.len() >= size); - - #[cfg(feature = "icicle_gpu")] - if env::var("ENABLE_ICICLE_GPU").is_ok() && !icicle::should_use_cpu_msm(size) { - best_multiexp_gpu::(&scalars, false) - } else { - best_multiexp_cpu(&scalars, &bases[0..size]) - } - - #[cfg(not(feature = "icicle_gpu"))] - best_multiexp_cpu(&scalars, &bases[0..size]) + best_multiexp(&scalars, &bases[0..size]) } fn get_g(&self) -> &[E::G1Affine] { diff --git a/halo2_proofs/src/poly/kzg/msm.rs b/halo2_proofs/src/poly/kzg/msm.rs index ae17bb1e62..27b7ec1d12 100644 --- a/halo2_proofs/src/poly/kzg/msm.rs +++ b/halo2_proofs/src/poly/kzg/msm.rs @@ -2,7 +2,7 @@ use std::fmt::Debug; use super::commitment::ParamsKZG; use crate::{ - arithmetic::{best_multiexp_cpu, parallelize}, + arithmetic::{best_multiexp, parallelize}, poly::commitment::MSM, }; use group::{Curve, Group}; @@ -81,7 +81,8 @@ where use group::prime::PrimeCurveAffine; let mut bases = vec![E::G1Affine::identity(); self.scalars.len()]; E::G1::batch_normalize(&self.bases, &mut bases); - best_multiexp_cpu(&self.scalars, &bases) + + best_multiexp(&self.scalars, &bases) } fn bases(&self) -> Vec { From 8499853998bdaa4052bef5879d877a7bc1f8ac5d Mon Sep 17 00:00:00 2001 From: emirsoyturk Date: Mon, 26 Aug 2024 12:17:40 +0300 Subject: [PATCH 09/11] remove whitespace and fix typo --- halo2_proofs/benches/fft.rs | 2 +- halo2_proofs/src/arithmetic.rs | 1 + halo2_proofs/src/dev.rs | 3 ++- halo2_proofs/src/plonk/evaluation.rs | 7 +++++-- halo2_proofs/src/plonk/prover.rs | 19 ++++++++++++++----- halo2_proofs/src/poly/ipa/commitment.rs | 2 +- 6 files changed, 24 insertions(+), 10 deletions(-) diff --git a/halo2_proofs/benches/fft.rs b/halo2_proofs/benches/fft.rs index 4b5019a520..0de72a0380 100644 --- a/halo2_proofs/benches/fft.rs +++ b/halo2_proofs/benches/fft.rs @@ -16,7 +16,7 @@ fn criterion_benchmark(c: &mut Criterion) { let mut a = (0..(1 << k)).map(|_| Fp::random(OsRng)).collect::>(); let omega = Fp::random(OsRng); // would be weird if this mattered b.iter(|| { - best_fft(&mut a, omega, k as u32, false); + best_fft(&mut a, omega, k as u32); }); }); } diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs index f65e88ba9a..ee0494fddd 100644 --- a/halo2_proofs/src/arithmetic.rs +++ b/halo2_proofs/src/arithmetic.rs @@ -299,6 +299,7 @@ pub fn best_fft>(a: &mut [G], omega: Scalar, Some(tw) }) .collect(); + if log_n <= log_threads { let mut chunk = 2_usize; let mut twiddle_chunk = n / 2; diff --git a/halo2_proofs/src/dev.rs b/halo2_proofs/src/dev.rs index ba75769c86..f06278e429 100644 --- a/halo2_proofs/src/dev.rs +++ b/halo2_proofs/src/dev.rs @@ -21,7 +21,8 @@ use crate::{ }; use maybe_rayon::prelude::{ - IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelIterator, ParallelSliceMut + IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelIterator, + ParallelSliceMut, }; pub mod metadata; diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index 0e8cdae9ce..ed8378adc8 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -447,7 +447,7 @@ impl Evaluator { }); // Permutations - let sets: &_ = &permutation.sets; + let sets = &permutation.sets; if !sets.is_empty() { let blinding_factors = pk.vk.cs.blinding_factors(); let last_rotation = Rotation(-((blinding_factors + 1) as i32)); @@ -594,6 +594,7 @@ impl Evaluator { // they are actually needed. let phi_coset = pk.vk.domain.coeff_to_extended(lookup.phi_poly.clone()); let m_coset = pk.vk.domain.coeff_to_extended(lookup.m_poly.clone()); + // Lookup constraints /* φ_i(X) = f_i(X) + α @@ -642,6 +643,7 @@ impl Evaluator { let inputs_prod: C::Scalar = inputs_value .iter() .fold(C::Scalar::ONE, |acc, input| acc * input); + // f_i(X) + α at ω^idx let fi_inverses = &inputs_inv_sum[n][idx]; let inputs_inv_sum = fi_inverses @@ -666,6 +668,7 @@ impl Evaluator { ); let r_next = get_rotation_idx(idx, 1, rot_scale, isize); + let lhs = { // τ(X) * Π(φ_i(X)) * (ϕ(gX) - ϕ(X)) table_value * inputs_prod * (phi_coset[r_next] - phi_coset[idx]) @@ -1095,4 +1098,4 @@ pub fn evaluate( } }); values -} \ No newline at end of file +} diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index c5f3ab84f6..866dee5555 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -4,6 +4,7 @@ use rand_core::RngCore; use std::collections::{BTreeSet, HashSet}; use std::ops::RangeTo; use std::{collections::HashMap, iter}; + use super::{ circuit::{ sealed::{self}, @@ -99,8 +100,8 @@ where .iter() .map(|instance| -> Result, Error> { let instance_values = instance - .iter() - .map(|values| { + .iter() + .map(|values| { let mut poly = domain.empty_lagrange(); assert_eq!(poly.len(), params.n() as usize); if values.len() > (poly.len() - (meta.blinding_factors() + 1)) { @@ -115,6 +116,7 @@ where Ok(poly) }) .collect::, _>>()?; + if P::QUERY_INSTANCE { let instance_commitments_projective: Vec<_> = instance_values .iter() @@ -141,12 +143,14 @@ where domain.lagrange_to_coeff(lagrange_vec) }) .collect(); + Ok(InstanceSingle { instance_values, instance_polys, }) }) .collect::, _>>()?; + #[derive(Clone)] struct AdviceSingle { pub advice_polys: Vec>, @@ -322,6 +326,7 @@ where } }) .collect::>(); + for ((circuit, advice), instances) in circuits.iter().zip(advice.iter_mut()).zip(instances) { @@ -362,6 +367,7 @@ where }) .collect(), ); + // Add blinding factors to advice columns for (column_index, advice_values) in column_indices.iter().zip(&mut advice_values) { if !witness.unblinded_advice.contains(column_index) { @@ -374,6 +380,7 @@ where } } } + // Compute commitments to advice column polynomials let blinds: Vec<_> = column_indices .iter() @@ -426,9 +433,9 @@ where (advice, challenges) }; - // Sample theta challenge for keeping lookup columns linearly independent let theta: ChallengeTheta<_> = transcript.squeeze_challenge_scalar(); + #[cfg(feature = "mv-lookup")] let lookups: Vec>> = instance .iter() @@ -490,7 +497,6 @@ where // Sample gamma challenge let gamma: ChallengeGamma<_> = transcript.squeeze_challenge_scalar(); - // Commit to permutations. let permutations: Vec> = instance .iter() @@ -562,11 +568,13 @@ where .collect::, _>>() }) .collect::, _>>()?; + // Commit to the vanishing argument's random polynomial for blinding h(x_3) let vanishing = vanishing::Argument::commit(params, domain, &mut rng, transcript)?; // Obtain challenge for keeping all separate gates linearly independent let y: ChallengeY<_> = transcript.squeeze_challenge_scalar(); + // Calculate the advice polys let advice: Vec> = advice .into_iter() @@ -585,6 +593,7 @@ where }, ) .collect(); + // Evaluate the h(X) polynomial let h_poly = pk.ev.evaluate_h( pk, @@ -605,6 +614,7 @@ where &shuffles, &permutations, ); + // Construct the vanishing argument's h(X) commitments let vanishing = vanishing.construct(params, domain, h_poly, &mut rng, transcript)?; let x: ChallengeX<_> = transcript.squeeze_challenge_scalar(); @@ -697,7 +707,6 @@ where }) .collect::, _>>()?; - let instances = instance .iter() .zip(advice.iter()) diff --git a/halo2_proofs/src/poly/ipa/commitment.rs b/halo2_proofs/src/poly/ipa/commitment.rs index 9d8644eb97..7be053c49c 100644 --- a/halo2_proofs/src/poly/ipa/commitment.rs +++ b/halo2_proofs/src/poly/ipa/commitment.rs @@ -289,7 +289,7 @@ mod test { #[test] fn test_opening_proof() { - const K: u32 = 8; + const K: u32 = 6; use ff::Field; use rand_core::OsRng; From 14983b4cf70be91e3a2a04421631941cbdff7e31 Mon Sep 17 00:00:00 2001 From: emirsoyturk Date: Mon, 26 Aug 2024 12:20:15 +0300 Subject: [PATCH 10/11] remove whitespace --- halo2_proofs/src/plonk/evaluation.rs | 3 ++- halo2_proofs/src/plonk/prover.rs | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index ed8378adc8..afd1fd3ea6 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -386,6 +386,7 @@ impl Evaluator { let l_last = &pk.l_last; let l_active_row = &pk.l_active_row; let p = &pk.vk.cs.permutation; + // Calculate the advice and instance cosets let advice: Vec>> = advice_polys .iter() @@ -594,7 +595,7 @@ impl Evaluator { // they are actually needed. let phi_coset = pk.vk.domain.coeff_to_extended(lookup.phi_poly.clone()); let m_coset = pk.vk.domain.coeff_to_extended(lookup.m_poly.clone()); - + // Lookup constraints /* φ_i(X) = f_i(X) + α diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index 866dee5555..d8d2946bdc 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -96,6 +96,7 @@ where pub instance_values: Vec>, pub instance_polys: Vec>, } + let instance: Vec> = instances .iter() .map(|instance| -> Result, Error> { @@ -143,7 +144,7 @@ where domain.lagrange_to_coeff(lagrange_vec) }) .collect(); - + Ok(InstanceSingle { instance_values, instance_polys, @@ -416,6 +417,7 @@ where advice.advice_blinds[*column_index] = blind; } } + for (index, phase) in meta.challenge_phase.iter().enumerate() { if current_phase == *phase { let existing = @@ -494,6 +496,7 @@ where // Sample beta challenge let beta: ChallengeBeta<_> = transcript.squeeze_challenge_scalar(); + // Sample gamma challenge let gamma: ChallengeGamma<_> = transcript.squeeze_challenge_scalar(); @@ -614,9 +617,10 @@ where &shuffles, &permutations, ); - + // Construct the vanishing argument's h(X) commitments let vanishing = vanishing.construct(params, domain, h_poly, &mut rng, transcript)?; + let x: ChallengeX<_> = transcript.squeeze_challenge_scalar(); let xn = x.pow([params.n()]); @@ -661,6 +665,7 @@ where transcript.write_scalar(*eval)?; } } + // Compute and hash fixed evals (shared across all circuit instances) let fixed_evals: Vec<_> = meta .fixed_queries @@ -669,6 +674,7 @@ where eval_polynomial(&pk.fixed_polys[column.index()], domain.rotate_omega(*x, at)) }) .collect(); + // Hash each fixed column evaluation for eval in fixed_evals.iter() { transcript.write_scalar(*eval)?; From 99c2d0c80b923f449c51bb438e6f724dcb437976 Mon Sep 17 00:00:00 2001 From: emirsoyturk Date: Mon, 26 Aug 2024 12:21:15 +0300 Subject: [PATCH 11/11] remove whitespace --- halo2_proofs/src/plonk/prover.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/halo2_proofs/src/plonk/prover.rs b/halo2_proofs/src/plonk/prover.rs index d8d2946bdc..d058317206 100644 --- a/halo2_proofs/src/plonk/prover.rs +++ b/halo2_proofs/src/plonk/prover.rs @@ -96,7 +96,7 @@ where pub instance_values: Vec>, pub instance_polys: Vec>, } - + let instance: Vec> = instances .iter() .map(|instance| -> Result, Error> {