Merkle tree for custom predicate batches (#471)

Resolve https://github.com/0xPARC/pod2/issues/466

Now batches are identified by the root of a merkle tree that contains all the predicates (using sequential indices as keys).  This means that the format to identify a custom predicate reference is still a hash + index, but the calculation of the hash is different.
The MainPod circuit now isn't limited by number of batches but instead number of custom predicates; and for each one we verify a merkle proof to verify the batch id.

I've removed a bunch of tests from lang that were testing splitting into multiple batches because there's no longer any need for that.  In a future PR we'll remove the code that handles batch splitting.

Each custom predicate needs 148.2 gates (which is very close to my estimate of 142.7 in https://github.com/0xPARC/pod2/issues/466#issuecomment-3823531286 where I actually made a mistake and considered 5 predicates per batch instead of 4 in the previous Params).
This commit is contained in:
Eduard S. 2026-02-04 11:12:32 +01:00 committed by GitHub
parent a7a30176a7
commit 641d8dabdd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 331 additions and 761 deletions

View file

@ -531,8 +531,6 @@ fn build_external_statement_map(input_pods: &[MainPod]) -> HashMap<Statement, Ha
#[cfg(test)]
mod tests {
use hex::ToHex;
use super::*;
use crate::{
backends::plonky2::{
@ -1374,108 +1372,6 @@ mod tests {
Ok(())
}
#[test]
fn test_batch_cardinality_forces_multi_pod() -> Result<()> {
// Verifies that the solver respects max_custom_predicate_batches per POD (C7).
//
// Setup:
// - max_custom_predicate_batches = 2 (small limit)
// - 4 different batches, each with one simple predicate
// - 4 operations, one from each batch
//
// Expected: Solver creates exactly 2 PODs since 4 batches / 2 per POD = 2 PODs
let params = Params {
max_statements: 48,
max_public_statements: 8,
max_custom_predicate_batches: 2, // Small limit to force splitting
max_input_pods: 10,
max_input_pods_public_statements: 20,
..Params::default()
};
let vd_set = &*MOCK_VD_SET;
// Create 4 separate batches using podlang parser
// Each batch has a simple predicate that checks a Contains statement
let parsed1 =
parse(r#"pred1(A) = AND(Contains(A, "x", 1))"#, &params, &[]).expect("parse batch1");
let batch1 = parsed1
.first_batch()
.expect("parse batch1 should have a batch");
let parsed2 =
parse(r#"pred2(A) = AND(Contains(A, "x", 2))"#, &params, &[]).expect("parse batch2");
let batch2 = parsed2
.first_batch()
.expect("parse batch2 should have a batch");
let parsed3 =
parse(r#"pred3(A) = AND(Contains(A, "x", 3))"#, &params, &[]).expect("parse batch3");
let batch3 = parsed3
.first_batch()
.expect("parse batch3 should have a batch");
let parsed4 =
parse(r#"pred4(A) = AND(Contains(A, "x", 4))"#, &params, &[]).expect("parse batch4");
let batch4 = parsed4
.first_batch()
.expect("parse batch4 should have a batch");
let mut builder = MultiPodBuilder::new(&params, vd_set);
// Add operations using predicates from each batch
// Each custom predicate needs a Contains statement argument
let dict1 = dict!({"x" => 1});
let contains1 = builder.priv_op(FrontendOp::dict_contains(dict1, "x", 1))?;
builder.priv_op(FrontendOp::custom(
batch1.predicate_ref_by_name("pred1").unwrap(),
[contains1],
))?;
let dict2 = dict!({"x" => 2});
let contains2 = builder.priv_op(FrontendOp::dict_contains(dict2, "x", 2))?;
builder.priv_op(FrontendOp::custom(
batch2.predicate_ref_by_name("pred2").unwrap(),
[contains2],
))?;
let dict3 = dict!({"x" => 3});
let contains3 = builder.priv_op(FrontendOp::dict_contains(dict3, "x", 3))?;
builder.priv_op(FrontendOp::custom(
batch3.predicate_ref_by_name("pred3").unwrap(),
[contains3],
))?;
let dict4 = dict!({"x" => 4});
let contains4 = builder.priv_op(FrontendOp::dict_contains(dict4, "x", 4))?;
builder.pub_op(FrontendOp::custom(
batch4.predicate_ref_by_name("pred4").unwrap(),
[contains4],
))?;
let solved = builder.solve()?;
// 4 batches / 2 per POD = exactly 2 PODs
assert_eq!(
solved.solution().pod_count,
2,
"Expected exactly 2 PODs for 4 batches with max_custom_predicate_batches=2, got {}",
solved.solution().pod_count
);
let pod_count = solved.solution().pod_count;
// Prove and verify
let prover = MockProver {};
let result = solved.prove(&prover)?;
assert_eq!(result.pods.len(), pod_count);
for (i, pod) in result.pods.iter().enumerate() {
pod.pod
.verify()
.map_err(|e| Error::Frontend(format!("POD {} verification failed: {}", i, e)))?;
}
Ok(())
}
#[test]
fn test_long_dependency_chain_spans_multiple_pods() -> Result<()> {
// Verifies that a long dependency chain correctly cascades through multiple
@ -1717,115 +1613,4 @@ mod tests {
Ok(())
}
#[test]
fn test_dependency_chain_with_batch_limit() -> Result<()> {
// Verifies that dependency chains work correctly when combined with
// batch cardinality limits.
//
// Setup: Two predicates in DIFFERENT batches, where pred_b depends on pred_a.
// With max_custom_predicate_batches = 1, pred_a and pred_b must be in
// different PODs due to the batch limit. The dependency must still be
// satisfied via cross-POD copying.
let params = Params {
max_statements: 10,
max_public_statements: 4,
max_input_pods: 4,
max_input_pods_public_statements: 20,
max_custom_predicate_batches: 1, // Only 1 batch per POD
max_custom_predicate_verifications: 10,
..Params::default()
};
let vd_set = &*MOCK_VD_SET;
// Create two SEPARATE batches (parsed separately to get different batch IDs)
let parsed_a =
parse(r#"pred_a(X) = AND(Contains(X, "k", 1))"#, &params, &[]).expect("parse batch_a");
let batch_a = parsed_a
.first_batch()
.expect("parse batch_a should have a batch");
// batch_b's pred_b accepts pred_a statements
// Must use "use batch" syntax to reference external predicates
let batch_a_id = batch_a.id().encode_hex::<String>();
let batch_b_src = format!(
r#"
use batch pred_a from 0x{batch_a_id}
pred_b(X) = AND(pred_a(X))
"#
);
let parsed_b =
parse(&batch_b_src, &params, std::slice::from_ref(batch_a)).expect("parse batch_b");
let batch_b = parsed_b
.first_batch()
.expect("parse batch_b should have a batch");
let mut builder = MultiPodBuilder::new(&params, vd_set);
// Statement 0: Contains (no batch)
let dict = dict!({"k" => 1});
let contains = builder.priv_op(FrontendOp::dict_contains(dict, "k", 1))?;
// Statement 1: pred_a (batch A)
let a_out = builder.priv_op(FrontendOp::custom(
batch_a.predicate_ref_by_name("pred_a").unwrap(),
[contains],
))?;
// Statement 2: pred_b (batch B) - depends on a_out
// With max_custom_predicate_batches = 1, this MUST be in a different POD
let _b_out = builder.pub_op(FrontendOp::custom(
batch_b.predicate_ref_by_name("pred_b").unwrap(),
[a_out],
))?;
let solved = builder.solve()?;
let solution = solved.solution();
// Expected: exactly 2 PODs due to batch limit
// - POD 0: contains(0), a_out(1) using batch_a; a_out public
// - POD 1 (output): b_out(2) using batch_b; b_out public
//
// Even though max_priv_statements=6 could fit all 3 statements,
// max_custom_predicate_batches=1 forces batch_a and batch_b into different PODs.
assert_eq!(
solution.pod_count, 2,
"Expected exactly 2 PODs due to batch limit (max_custom_predicate_batches=1)"
);
// POD 0: contains(0), a_out(1)
assert!(
solution.pod_statements[0].contains(&0) && solution.pod_statements[0].contains(&1),
"POD 0 should contain statements 0 and 1, got {:?}",
solution.pod_statements[0]
);
assert!(
solution.pod_public_statements[0].contains(&1),
"Statement 1 (a_out) should be public in POD 0"
);
// POD 1 (output): b_out(2)
assert!(
solution.pod_statements[1].contains(&2),
"POD 1 should contain statement 2 (b_out), got {:?}",
solution.pod_statements[1]
);
assert!(
solution.pod_public_statements[1].contains(&2),
"Statement 2 (b_out) should be public in output POD"
);
// Prove and verify
let prover = MockProver {};
let result = solved.prove(&prover)?;
for (i, pod) in result.pods.iter().enumerate() {
pod.pod
.verify()
.map_err(|e| Error::Frontend(format!("POD {} verification failed: {}", i, e)))?;
}
Ok(())
}
}