Create multiple PODs where resource limits for a single POD are exceeded (#444)

* Create multiple PODs where resource limits for a single POD are exceeded

* HashSet -> BTreeSet determinism fix

* Fixed incorrect assignment of input PODs and added test

* Ensure only a single output POD

* Return error when reveal() called with unknown statement

* Use unreachable! for presumed-impossible cases

* Use assert_eq! rather than debug_assert_eq

* Use FIFO for topological sort

* Simplify bounds calculation

* Some more simplifications/comments

* Enforce dep_idx < idx invariant

* Incrementally solve rather than estimating slack

* Fix tests to correctly test dependencies between private and public statements

* More tidying

* Note possible optimisation of MainPodBuilder cloning of input PODs

* Fix tracking of total input POD count

* Refactor tests

* Formatting

* Small optimisation: use Vec in place of BTreeSet

* Account for automatically-inserted Contains statements

* Formatting

* Fix possible issue with copied statements

* Simplify result type given only a single result MainPod

* Remove unnecessary POD count estimate functionality

* Simplify dependency ordering and tracking

* Remove notion of multiple output PODs from solver

* Minor simplifications

* Use add_constraint instead of with

* Remove unnecessary check following assertion

* Fix handling of anchored keys given that Contains statements are not auto-inserted if they already exist

* Fix confusing dependency graph test

* Remove prove_order

* Fix deduplication and possible double-counting of public but not copied statements

* Reorder so that the output POD is the final POD

* Add more detailed tests

* Remove redundant tests

* Simplify POD counting

* More docs

* Flag more branches as unreachable

* Formatting

* Fix for changed custom batch parsing
This commit is contained in:
Rob Knight 2026-01-28 07:44:04 +01:00 committed by GitHub
parent d1b7b4d37e
commit 48aa004ae5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 3047 additions and 0 deletions

View file

@ -41,6 +41,7 @@ serde_bytes = "0.11"
serde_arrays = "0.2.0" serde_arrays = "0.2.0"
sha2 = { version = "0.10.9" } sha2 = { version = "0.10.9" }
rand_chacha = "0.3.1" rand_chacha = "0.3.1"
good_lp = { version = "1.8", default-features = false, features = ["microlp"] }
# Uncomment for debugging with https://github.com/ed255/plonky2/ at branch `feat/debug`. The repo directory needs to be checked out next to the pod2 repo directory. # Uncomment for debugging with https://github.com/ed255/plonky2/ at branch `feat/debug`. The repo directory needs to be checked out next to the pod2 repo directory.
# [patch."https://github.com/0xPARC/plonky2"] # [patch."https://github.com/0xPARC/plonky2"]

View file

@ -21,11 +21,15 @@ use crate::middleware::{
mod custom; mod custom;
mod error; mod error;
mod multi_pod;
mod operation; mod operation;
mod pod_request; mod pod_request;
mod serialization; mod serialization;
pub use custom::*; pub use custom::*;
pub use error::*; pub use error::*;
pub use multi_pod::{
MultiPodBuilder, MultiPodResult, MultiPodSolution, Options as MultiPodOptions,
};
pub use operation::*; pub use operation::*;
pub use pod_request::*; pub use pod_request::*;

View file

@ -0,0 +1,224 @@
//! Resource cost analysis for statements and operations.
//!
//! This module provides cost analysis for multi-POD packing. Each operation
//! consumes various resources that have per-POD limits.
use std::collections::BTreeSet;
use crate::{
frontend::{Operation, OperationArg},
middleware::{
CustomPredicateBatch, Hash, NativeOperation, OperationType, RawValue, Statement, ValueRef,
},
};
/// Unique identifier for a custom predicate batch.
///
/// Uses the batch's cryptographic hash as identifier. Two batches with the same
/// hash are considered identical for resource counting purposes.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct CustomBatchId(pub Hash);
impl From<&CustomPredicateBatch> for CustomBatchId {
fn from(batch: &CustomPredicateBatch) -> Self {
Self(batch.id())
}
}
/// Unique identifier for an anchored key (dict, key) pair.
///
/// When a Contains statement is used as an argument to operations like gt(), eq(), etc.,
/// the value is accessed via an "anchored key" - a reference to a specific key in a
/// specific dictionary. Each unique anchored key used in a POD requires a Contains
/// statement to be present in that POD (auto-inserted by MainPodBuilder if needed).
///
/// We use the raw values of the dict and key for comparison, as they uniquely identify
/// the anchored key regardless of the specific Value types involved.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct AnchoredKeyId {
/// The dictionary root value (raw representation for Ord).
pub dict: RawValue,
/// The key within the dictionary (raw representation for Ord).
pub key: RawValue,
}
impl AnchoredKeyId {
/// Create a new anchored key ID from raw values.
pub fn new(dict: RawValue, key: RawValue) -> Self {
Self { dict, key }
}
/// Try to extract an anchored key ID from a Contains statement with all literal values.
pub fn from_contains_statement(stmt: &Statement) -> Option<Self> {
if let Statement::Contains(
ValueRef::Literal(dict),
ValueRef::Literal(key),
ValueRef::Literal(_value),
) = stmt
{
Some(Self::new(dict.raw(), key.raw()))
} else {
None
}
}
}
/// Resource costs for a single statement/operation.
///
/// Each field corresponds to a resource with a per-POD limit in `Params`.
#[derive(Clone, Debug, Default)]
pub struct StatementCost {
/// Number of merkle proofs used (for Contains/NotContains).
/// Limit: `params.max_merkle_proofs_containers`
pub merkle_proofs: usize,
/// Number of merkle tree state transition proofs (for Insert/Update/Delete).
/// Limit: `params.max_merkle_tree_state_transition_proofs_containers`
pub merkle_state_transitions: usize,
/// Number of custom predicate verifications.
/// Limit: `params.max_custom_predicate_verifications`
pub custom_pred_verifications: usize,
/// Number of SignedBy operations.
/// Limit: `params.max_signed_by`
pub signed_by: usize,
/// Number of PublicKeyOf operations.
/// Limit: `params.max_public_key_of`
pub public_key_of: usize,
/// Custom predicate batches used (for batch cardinality constraint).
/// Limit: `params.max_custom_predicate_batches` distinct batches per POD.
pub custom_batch_ids: BTreeSet<CustomBatchId>,
/// Anchored keys referenced by this operation.
///
/// When a Contains statement with all literal values is used as an argument,
/// the operation references an "anchored key" (dict, key pair). Each unique
/// anchored key used in a POD incurs an additional Contains statement cost,
/// as MainPodBuilder::add_entries_contains will auto-insert it if not already present.
pub anchored_keys: BTreeSet<AnchoredKeyId>,
}
impl StatementCost {
/// Compute the resource cost of an operation.
pub fn from_operation(op: &Operation) -> Self {
let mut cost = Self::default();
match &op.0 {
OperationType::Native(native_op) => {
match native_op {
// Operations that use merkle proofs
NativeOperation::ContainsFromEntries
| NativeOperation::NotContainsFromEntries
| NativeOperation::DictContainsFromEntries
| NativeOperation::DictNotContainsFromEntries
| NativeOperation::SetContainsFromEntries
| NativeOperation::SetNotContainsFromEntries
| NativeOperation::ArrayContainsFromEntries => {
cost.merkle_proofs = 1;
}
// Operations that use merkle state transitions
NativeOperation::ContainerInsertFromEntries
| NativeOperation::ContainerUpdateFromEntries
| NativeOperation::ContainerDeleteFromEntries
| NativeOperation::DictInsertFromEntries
| NativeOperation::DictUpdateFromEntries
| NativeOperation::DictDeleteFromEntries
| NativeOperation::SetInsertFromEntries
| NativeOperation::SetDeleteFromEntries
| NativeOperation::ArrayUpdateFromEntries => {
cost.merkle_state_transitions = 1;
}
// SignedBy operation
NativeOperation::SignedBy => {
cost.signed_by = 1;
}
// PublicKeyOf operation
NativeOperation::PublicKeyOf => {
cost.public_key_of = 1;
}
// Operations with no special resource costs
NativeOperation::None
| NativeOperation::CopyStatement
| NativeOperation::EqualFromEntries
| NativeOperation::NotEqualFromEntries
| NativeOperation::LtEqFromEntries
| NativeOperation::LtFromEntries
| NativeOperation::TransitiveEqualFromStatements
| NativeOperation::LtToNotEqual
| NativeOperation::SumOf
| NativeOperation::ProductOf
| NativeOperation::MaxOf
| NativeOperation::HashOf
// Syntactic sugar variants (lowered before proving)
| NativeOperation::GtEqFromEntries
| NativeOperation::GtFromEntries
| NativeOperation::GtToNotEqual => {}
}
}
OperationType::Custom(cpr) => {
cost.custom_pred_verifications = 1;
cost.custom_batch_ids
.insert(CustomBatchId::from(&*cpr.batch));
}
}
// Extract anchored keys from operation arguments.
// Any argument that is a Contains statement with all literal values
// represents an anchored key reference that will require a Contains
// statement in the POD (auto-inserted by MainPodBuilder if needed).
for arg in &op.1 {
if let OperationArg::Statement(stmt) = arg {
if let Some(anchored_key) = AnchoredKeyId::from_contains_statement(stmt) {
cost.anchored_keys.insert(anchored_key);
}
}
}
cost
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
frontend::Operation as FrontendOp,
middleware::{NativeOperation, OperationAux, OperationType},
};
fn make_native_op(native_op: NativeOperation) -> FrontendOp {
FrontendOp(OperationType::Native(native_op), vec![], OperationAux::None)
}
#[test]
fn test_cost_from_native_ops() {
// Test merkle proof ops
let contains_op = make_native_op(NativeOperation::ContainsFromEntries);
let cost = StatementCost::from_operation(&contains_op);
assert_eq!(cost.merkle_proofs, 1);
assert_eq!(cost.merkle_state_transitions, 0);
// Test merkle state transition ops
let insert_op = make_native_op(NativeOperation::ContainerInsertFromEntries);
let cost = StatementCost::from_operation(&insert_op);
assert_eq!(cost.merkle_proofs, 0);
assert_eq!(cost.merkle_state_transitions, 1);
// Test signed_by
let signed_op = make_native_op(NativeOperation::SignedBy);
let cost = StatementCost::from_operation(&signed_op);
assert_eq!(cost.signed_by, 1);
// Test public_key_of
let pk_op = make_native_op(NativeOperation::PublicKeyOf);
let cost = StatementCost::from_operation(&pk_op);
assert_eq!(cost.public_key_of, 1);
}
}

View file

@ -0,0 +1,178 @@
//! Dependency analysis for statements and operations.
//!
//! This module analyzes dependencies between statements to determine
//! which statements must be proved before others.
use std::collections::HashMap;
use crate::{
frontend::{Operation, OperationArg},
middleware::{Hash, Statement},
};
/// Represents a source of a statement dependency.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum StatementSource {
/// Statement created within this builder at the given index.
Internal(usize),
/// Statement from an external input POD (identified by POD hash).
External(Hash),
}
/// Dependency graph for all statements in a builder.
///
/// Each element `statement_deps[i]` is the list of dependencies for statement `i`.
#[derive(Clone, Debug)]
pub struct DependencyGraph {
/// Dependencies for each statement (indexed by statement index).
pub statement_deps: Vec<Vec<StatementSource>>,
}
impl DependencyGraph {
/// Build a dependency graph from statements and operations.
///
/// `statements` and `operations` should be parallel arrays where
/// `operations[i]` produces `statements[i]`.
///
/// `external_pod_statements` maps (pod_hash, statement) pairs to enable
/// recognizing references to external POD statements.
pub fn build(
statements: &[Statement],
operations: &[Operation],
external_pod_statements: &HashMap<Statement, Hash>,
) -> Self {
let mut statement_deps = Vec::with_capacity(statements.len());
// Build a map from statement to its index for internal lookup.
// Use entry().or_insert() to preserve the FIRST occurrence of each statement.
// This is important for CopyStatement: if statements[0] = A and statements[2] = copy(A) = A,
// we want statement_to_index[A] = 0 (the original), not 2 (the copy).
let mut statement_to_index: HashMap<&Statement, usize> = HashMap::new();
for (i, s) in statements.iter().enumerate() {
if !s.is_none() {
statement_to_index.entry(s).or_insert(i);
}
}
for (idx, op) in operations.iter().enumerate() {
let mut deps = Vec::new();
// Examine each argument to the operation
for arg in &op.1 {
if let OperationArg::Statement(ref dep_stmt) = arg {
if dep_stmt.is_none() {
continue;
}
// Check if this is an internal statement (created earlier in this builder)
if let Some(&dep_idx) = statement_to_index.get(dep_stmt) {
// Internal dependencies must always be from earlier statements
assert!(
dep_idx <= idx,
"Statement at index {} depends on future statement at index {}",
idx,
dep_idx
);
if dep_idx < idx {
// The statement was created by an earlier operation
deps.push(StatementSource::Internal(dep_idx));
continue;
}
// dep_idx == idx: The first occurrence of this statement is at the current index,
// meaning this operation both takes and produces this statement (e.g., CopyStatement
// copying from an external POD). Fall through to check external PODs for the source.
}
// Check if this is from an external POD
if let Some(&pod_hash) = external_pod_statements.get(dep_stmt) {
deps.push(StatementSource::External(pod_hash));
} else {
// Statement arguments should either be internal (created earlier)
// or from external PODs. If neither, something is wrong.
unreachable!(
"Statement argument not found in internal statements or external PODs: {:?}",
dep_stmt
);
}
}
}
statement_deps.push(deps);
}
Self { statement_deps }
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
frontend::Operation as FrontendOp,
middleware::{NativeOperation, OperationAux, OperationType, Value, ValueRef},
};
fn equal_stmt(n: i64) -> Statement {
Statement::Equal(
ValueRef::Literal(Value::from(n)),
ValueRef::Literal(Value::from(n)),
)
}
/// None operation produces Statement::None
fn none_op() -> FrontendOp {
FrontendOp(
OperationType::Native(NativeOperation::None),
vec![],
OperationAux::None,
)
}
/// CopyStatement(s) produces s (the same statement)
fn copy_op(stmt: Statement) -> FrontendOp {
FrontendOp(
OperationType::Native(NativeOperation::CopyStatement),
vec![OperationArg::Statement(stmt)],
OperationAux::None,
)
}
#[test]
fn test_copy_creates_dependency_on_original() {
// CopyStatement(s) produces s. When we copy a statement, the copy
// depends on where that statement first appears.
//
// statements[0] = s (produced by none_op - not realistic, but we need a first occurrence)
// statements[1] = s (produced by copy_op(s))
//
// op1's argument s matches statements[0], so statement 1 depends on statement 0.
let s = equal_stmt(1);
let statements = vec![s.clone(), s.clone()];
let operations = vec![
none_op(), // Placeholder - in reality something else would produce s
copy_op(s), // Copies s, producing s. Depends on statements[0].
];
let graph = DependencyGraph::build(&statements, &operations, &HashMap::new());
assert!(graph.statement_deps[0].is_empty());
assert_eq!(graph.statement_deps[1], vec![StatementSource::Internal(0)]);
}
#[test]
fn test_multiple_copies_depend_on_original() {
// Multiple copies of the same statement all depend on where it first appears.
let s = equal_stmt(1);
let statements = vec![s.clone(), s.clone(), s.clone()];
let operations = vec![none_op(), copy_op(s.clone()), copy_op(s)];
let graph = DependencyGraph::build(&statements, &operations, &HashMap::new());
assert!(graph.statement_deps[0].is_empty());
assert_eq!(graph.statement_deps[1], vec![StatementSource::Internal(0)]);
assert_eq!(graph.statement_deps[2], vec![StatementSource::Internal(0)]);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,703 @@
//! MILP solver for multi-POD packing.
//!
//! This module builds and solves a Mixed Integer Linear Program (MILP) to minimize
//! the number of PODs needed to prove a set of statements while respecting resource
//! limits and dependency constraints.
//!
//! # Constraint Overview
//!
//! The solver uses the following constraints (numbered for reference in code comments):
//!
//! - **Constraint 1 (Coverage)**: Each statement must be proved in at least one POD.
//! - **Constraint 2 (Output POD)**: Output-public statements must be public in the last POD.
//! - **Constraint 2b (Privacy)**: Non-output-public statements cannot be public in the output POD.
//! - **Constraint 3 (Public ⇒ Proved)**: A statement can only be public if it's proved there.
//! - **Constraint 4 (POD Existence)**: If any statement is proved in POD p, then p is used.
//! - **Constraint 5 (Dependencies)**: If statement S depends on D and S is proved in POD p,
//! then D must be available: either proved locally in p, or public in some earlier POD.
//! - **Constraint 5b (Copy Tracking)**: Track when dependencies need CopyStatement.
//! - **Constraint 6 (Resource Limits)**: Per-POD limits on statements, public slots, merkle
//! proofs, custom predicates, batches, etc.
//! - **Constraint 7 (Batch Cardinality)**: Limit distinct custom predicate batches per POD.
//! - **Constraint 7b (Anchored Keys)**: Track auto-inserted Contains for anchored key references.
//! - **Constraint 8a (Internal Inputs)**: Track which earlier PODs are used as inputs.
//! - **Constraint 8b (External Inputs)**: Track which external PODs are used as inputs.
//! - **Constraint 8c (Input Limit)**: Total inputs (internal + external) ≤ max_input_pods.
//! - **Constraint 9 (Symmetry Breaking)**: PODs are used in order (0, 1, 2, ...) with no gaps.
//!
//! # Solution Approach
//!
//! The solver uses an incremental approach: it tries solving with the minimum possible
//! number of PODs first, then increments until a feasible solution is found. This is
//! efficient for the common case where few PODs are needed.
// MILP constraint building uses explicit index loops for clarity
#![allow(clippy::needless_range_loop)]
use std::collections::BTreeSet;
use good_lp::{
constraint, default_solver, variable, Expression, ProblemVariables, Solution, SolverModel,
Variable,
};
use itertools::Itertools;
use super::Result;
use crate::{
frontend::multi_pod::{
cost::{AnchoredKeyId, CustomBatchId, StatementCost},
deps::{DependencyGraph, StatementSource},
},
middleware::Params,
};
/// Threshold for interpreting MILP solver's floating-point results as binary.
/// The solver returns continuous values in [0, 1] for binary variables;
/// values > 0.5 are interpreted as "true" (1), otherwise "false" (0).
const SOLVER_BINARY_THRESHOLD: f64 = 0.5;
/// Solution from the MILP solver.
#[derive(Clone, Debug)]
pub struct MultiPodSolution {
/// Number of PODs needed.
pub pod_count: usize,
/// For each statement index, which POD(s) it is proved in.
/// (A statement may be proved in multiple PODs if re-proving is cheaper than copying.)
pub statement_to_pods: Vec<Vec<usize>>,
/// For each POD, which statement indices are proved in it.
pub pod_statements: Vec<Vec<usize>>,
/// For each POD, which statement indices are public in it.
pub pod_public_statements: Vec<BTreeSet<usize>>,
}
/// Input to the MILP solver.
pub struct SolverInput<'a> {
/// Number of statements.
pub num_statements: usize,
/// Resource costs for each statement.
pub costs: &'a [StatementCost],
/// Dependency graph.
pub deps: &'a DependencyGraph,
/// Indices of statements that must be public in output PODs.
pub output_public_indices: &'a [usize],
/// Parameters defining per-POD limits.
pub params: &'a Params,
/// Maximum number of PODs the solver will consider.
pub max_pods: usize,
/// All unique anchored keys referenced by any statement.
///
/// Each unique (dict, key) pair that is used as an anchored key reference
/// in any operation. When a Contains statement with literal values is used
/// as an argument, it creates an anchored key reference.
pub all_anchored_keys: &'a [AnchoredKeyId],
/// For each anchored key, the statement index that produces it (if any).
///
/// When a Contains statement with literal (dict, key, value) args is explicitly
/// added, it "produces" that anchored key. If the producer is in the same POD
/// as statements using the anchored key, no auto-insertion is needed.
/// `anchored_key_producers[i]` corresponds to `all_anchored_keys[i]`.
pub anchored_key_producers: &'a [Option<usize>],
/// Statement content groups for deduplication.
///
/// Each inner Vec contains statement indices that have identical content.
/// When multiple statements with the same content are proved in the same POD,
/// they only use one statement slot (the POD deduplicates identical statements).
pub statement_content_groups: &'a [Vec<usize>],
}
/// Solve the MILP problem to find optimal POD packing.
///
/// Uses an incremental approach: tries solving with min_pods first,
/// then increments until a solution is found or target_pods is exceeded.
/// This is efficient for the common case where min_pods is sufficient.
pub fn solve(input: &SolverInput) -> Result<MultiPodSolution> {
let n = input.num_statements;
// Require at least one public statement. A POD with no public statements
// can't prove anything to an external verifier.
if input.output_public_indices.is_empty() {
return Err(super::Error::Solver(
"No public statements requested. Use pub_op() to add at least one statement \
that should be visible in the output POD."
.to_string(),
));
}
// Check that all output-public statements can fit in a single POD
let num_output_public = input.output_public_indices.len();
if num_output_public > input.params.max_public_statements {
return Err(super::Error::Solver(format!(
"Too many public statements requested: {} requested, but max_public_statements is {}. \
All public statements must fit in a single output POD.",
num_output_public, input.params.max_public_statements
)));
}
// Lower bound on number of PODs needed
// Note: max_priv_statements is the limit on total unique statements per POD
// (public statements are copies from private slots)
let max_stmts_per_pod = input.params.max_priv_statements();
let min_pods_by_statements = n.div_ceil(max_stmts_per_pod);
let min_pods = min_pods_by_statements.max(1);
// Check if the problem exceeds the configured max_pods limit
if min_pods > input.max_pods {
return Err(super::Error::Solver(format!(
"Problem requires at least {} PODs, but max_pods is set to {}. \
Increase Options::max_pods to allow more PODs.",
min_pods, input.max_pods
)));
}
// Collect all unique custom batch IDs used
let all_batches: Vec<CustomBatchId> = input
.costs
.iter()
.flat_map(|c| c.custom_batch_ids.iter().cloned())
.unique()
.collect();
// Incremental approach: try solving with increasing POD counts
// Start with min_pods and increment until we find a feasible solution
for target_pods in min_pods..=input.max_pods {
if let Some(solution) = try_solve_with_pods(input, target_pods, &all_batches)? {
return Ok(solution);
}
// Infeasible with target_pods, try more
}
// No feasible solution found even with max_pods
Err(super::Error::Solver(format!(
"No feasible solution found with up to {} PODs",
input.max_pods
)))
}
/// Try to solve the packing problem with exactly `target_pods` PODs.
///
/// Builds a MILP model with all constraints and attempts to solve it.
/// Returns `Ok(Some(solution))` if a feasible assignment exists,
/// `Ok(None)` if the problem is infeasible with this many PODs.
///
/// The caller (in `solve()`) handles incrementing `target_pods` when infeasible.
fn try_solve_with_pods(
input: &SolverInput,
target_pods: usize,
all_batches: &[CustomBatchId],
) -> Result<Option<MultiPodSolution>> {
// Create variables
let mut vars = ProblemVariables::new();
let n = input.num_statements;
// prove[s][p] - statement s is proved in POD p
let prove: Vec<Vec<Variable>> = (0..n)
.map(|_| {
(0..target_pods)
.map(|_| vars.add(variable().binary()))
.collect()
})
.collect();
// public[s][p] - statement s is public in POD p
let public: Vec<Vec<Variable>> = (0..n)
.map(|_| {
(0..target_pods)
.map(|_| vars.add(variable().binary()))
.collect()
})
.collect();
// pod_used[p] - POD p is used
let pod_used: Vec<Variable> = (0..target_pods)
.map(|_| vars.add(variable().binary()))
.collect();
// batch_used[b][p] - custom batch b is used in POD p
let batch_used: Vec<Vec<Variable>> = (0..all_batches.len())
.map(|_| {
(0..target_pods)
.map(|_| vars.add(variable().binary()))
.collect()
})
.collect();
// anchored_key_used[ak][p] - anchored key ak is used in POD p
// When a statement references an anchored key (via a Contains statement argument),
// that POD must have a Contains statement for that (dict, key) pair.
// MainPodBuilder::add_entries_contains auto-inserts these, and we must account
// for them in the statement count.
let anchored_key_used: Vec<Vec<Variable>> = (0..input.all_anchored_keys.len())
.map(|_| {
(0..target_pods)
.map(|_| vars.add(variable().binary()))
.collect()
})
.collect();
// uses_input[p][pp] - POD p uses POD pp as an input (pp < p)
// We only create variables for pp < p
let uses_input: Vec<Vec<Variable>> = (0..target_pods)
.map(|p| (0..p).map(|_| vars.add(variable().binary())).collect())
.collect();
// Collect all statement indices that are internal dependencies.
// These are statements that other statements depend on, and may need to be copied
// into PODs where the dependent statement is proved but the dependency is not.
let internal_deps: BTreeSet<usize> = input
.deps
.statement_deps
.iter()
.flat_map(|deps| deps.iter())
.filter_map(|dep| match dep {
StatementSource::Internal(d) => Some(*d),
StatementSource::External(_) => None,
})
.collect();
// needs_copy[d][p] - dependency d needs to be copied into POD p
// This is 1 when: (some statement s in p depends on d) AND (d is not proved in p)
// We only create variables for dependencies that are actually used.
let dep_indices: Vec<usize> = internal_deps.iter().copied().collect();
let needs_copy: Vec<Vec<Variable>> = (0..dep_indices.len())
.map(|_| {
(0..target_pods)
.map(|_| vars.add(variable().binary()))
.collect()
})
.collect();
// Collect all external POD hashes that statements depend on.
// These are user-provided input PODs referenced by statements.
use crate::middleware::Hash;
let external_pods: Vec<Hash> = input
.deps
.statement_deps
.iter()
.flat_map(|deps| deps.iter())
.filter_map(|dep| match dep {
StatementSource::External(h) => Some(*h),
StatementSource::Internal(_) => None,
})
.collect::<BTreeSet<_>>()
.into_iter()
.collect();
// uses_external[p][e] - POD p uses external POD e as an input
let uses_external: Vec<Vec<Variable>> = (0..target_pods)
.map(|_| {
(0..external_pods.len())
.map(|_| vars.add(variable().binary()))
.collect()
})
.collect();
// Map from external POD hash to index in uses_external
let external_to_idx: std::collections::HashMap<Hash, usize> = external_pods
.iter()
.enumerate()
.map(|(i, h)| (*h, i))
.collect();
// content_group_used[g][p] - content group g has at least one statement proved in POD p
// When multiple statements have identical content, they share a slot in the POD.
// This variable tracks whether at least one statement from each content group is proved.
let num_groups = input.statement_content_groups.len();
let content_group_used: Vec<Vec<Variable>> = (0..num_groups)
.map(|_| {
(0..target_pods)
.map(|_| vars.add(variable().binary()))
.collect()
})
.collect();
// Objective: minimize number of PODs used
let objective: Expression = pod_used.iter().sum();
let mut model = vars.minimise(objective).using(default_solver);
// Constraint 1: Each statement must be proved at least once
for s in 0..n {
let sum: Expression = prove[s].iter().sum();
model.add_constraint(constraint!(sum >= 1));
}
// Constraint 2: Output-public statements must be public in the output POD (last POD)
// The output POD is at index target_pods-1, allowing it to access all earlier PODs
// for dependencies. This ensures exactly one output POD with deterministic location.
let output_pod = target_pods - 1;
for &s in input.output_public_indices {
model.add_constraint(constraint!(public[s][output_pod] == 1));
}
// Constraint 2b: Non-output-public statements cannot be public in the output POD
// This prevents private statements from leaking to the output POD's public slots.
for s in 0..n {
if !input.output_public_indices.contains(&s) {
model.add_constraint(constraint!(public[s][output_pod] == 0));
}
}
// Constraint 3: Public implies proved
for s in 0..n {
for p in 0..target_pods {
model.add_constraint(constraint!(public[s][p] <= prove[s][p]));
}
}
// Constraint 4: Pod existence - if any statement is proved in p, p is used
for s in 0..n {
for p in 0..target_pods {
model.add_constraint(constraint!(prove[s][p] <= pod_used[p]));
}
}
// Constraint 5: Dependencies (works with Constraint 8 to enforce input POD tracking)
//
// If s depends on d (internal), and s is proved in p, then either:
// - d is proved in p (local availability), OR
// - d is public in some earlier POD p' < p (cross-POD availability)
//
// This constraint ensures dependencies are AVAILABLE. It does NOT track which
// earlier PODs are actually used as inputs - that's handled by Constraint 8.
// Together:
// - Constraint 5 ensures the dependency CAN be satisfied
// - Constraint 8 ensures that when we use a statement from earlier POD pp,
// we count pp as an input to pod p (for max_input_pods enforcement)
for s in 0..n {
for dep in &input.deps.statement_deps[s] {
if let StatementSource::Internal(d) = dep {
for p in 0..target_pods {
// prove[s][p] <= prove[d][p] + sum_{p' < p} public[d][p']
let mut rhs: Expression = prove[*d][p].into();
for pp in 0..p {
rhs += public[*d][pp];
}
model.add_constraint(constraint!(prove[s][p] <= rhs));
}
}
}
}
// Constraint 5b: needs_copy tracking for cross-POD dependencies
// needs_copy[d][p] = 1 when: some statement s proved in p depends on d, AND d is not proved in p.
// This tracks CopyStatements that will be added during build_single_pod.
for (di, &d) in dep_indices.iter().enumerate() {
for p in 0..target_pods {
// needs_copy[d][p] >= prove[s][p] - prove[d][p] for each s that depends on d
// If s is in p (prove[s][p]=1) and d is not in p (prove[d][p]=0), then needs_copy >= 1
for s in 0..n {
let depends_on_d = input.deps.statement_deps[s]
.iter()
.any(|dep| matches!(dep, StatementSource::Internal(dep_d) if *dep_d == d));
if depends_on_d {
model.add_constraint(constraint!(
needs_copy[di][p] >= prove[s][p] - prove[d][p]
));
}
}
// needs_copy[d][p] <= 1 - prove[d][p]
// If d is proved locally (prove[d][p]=1), no copy needed (needs_copy <= 0)
model.add_constraint(constraint!(needs_copy[di][p] <= 1 - prove[d][p]));
}
}
// Constraint 6: Resource limits per POD
//
// 6a-pre: Content group tracking for statement deduplication
// When multiple statement indices have identical content, they share a single slot in the POD.
// content_group_used[g][p] = 1 iff at least one statement from group g is proved in POD p.
for (g, group) in input.statement_content_groups.iter().enumerate() {
for p in 0..target_pods {
// Lower bound: if any statement in the group is proved, the group is used
for &s in group {
model.add_constraint(constraint!(content_group_used[g][p] >= prove[s][p]));
}
// Upper bound: if no statements in the group are proved, the group is not used
let group_prove_sum: Expression = group.iter().map(|&s| prove[s][p]).sum();
model.add_constraint(constraint!(content_group_used[g][p] <= group_prove_sum));
}
}
for p in 0..target_pods {
// 6a: Unique statement count (unique content groups + CopyStatements + anchored key Contains)
// Statements with identical content share a slot, so we count content groups, not indices.
// CopyStatements and anchored key Contains also use statement slots.
// The total must not exceed max_priv_statements (= max_statements - max_public_statements).
let unique_stmt_sum: Expression = (0..num_groups).map(|g| content_group_used[g][p]).sum();
let copy_sum: Expression = (0..dep_indices.len()).map(|di| needs_copy[di][p]).sum();
let anchored_key_sum: Expression = (0..input.all_anchored_keys.len())
.map(|ak| anchored_key_used[ak][p])
.sum();
model.add_constraint(constraint!(
unique_stmt_sum + copy_sum + anchored_key_sum
<= (input.params.max_priv_statements() as f64) * pod_used[p]
));
// 6b: Public statement count
let pub_sum: Expression = (0..n).map(|s| public[s][p]).sum();
model.add_constraint(constraint!(
pub_sum <= (input.params.max_public_statements as f64) * pod_used[p]
));
// 6c: Merkle proofs
let merkle_sum: Expression = (0..n)
.map(|s| (input.costs[s].merkle_proofs as f64) * prove[s][p])
.sum();
model.add_constraint(constraint!(
merkle_sum <= (input.params.max_merkle_proofs_containers as f64) * pod_used[p]
));
// 6d: Merkle state transitions
let mst_sum: Expression = (0..n)
.map(|s| (input.costs[s].merkle_state_transitions as f64) * prove[s][p])
.sum();
model.add_constraint(constraint!(
mst_sum
<= (input
.params
.max_merkle_tree_state_transition_proofs_containers as f64)
* pod_used[p]
));
// 6e: Custom predicate verifications
let cpv_sum: Expression = (0..n)
.map(|s| (input.costs[s].custom_pred_verifications as f64) * prove[s][p])
.sum();
model.add_constraint(constraint!(
cpv_sum <= (input.params.max_custom_predicate_verifications as f64) * pod_used[p]
));
// 6f: SignedBy
let sb_sum: Expression = (0..n)
.map(|s| (input.costs[s].signed_by as f64) * prove[s][p])
.sum();
model.add_constraint(constraint!(
sb_sum <= (input.params.max_signed_by as f64) * pod_used[p]
));
// 6g: PublicKeyOf
let pko_sum: Expression = (0..n)
.map(|s| (input.costs[s].public_key_of as f64) * prove[s][p])
.sum();
model.add_constraint(constraint!(
pko_sum <= (input.params.max_public_key_of as f64) * pod_used[p]
));
}
// Constraint 7: Batch cardinality
// batch_used[b][p] >= prove[s][p] for all s that use batch b (batch is used if any statement uses it)
// batch_used[b][p] <= sum of prove[s][p] for all s using batch b (batch is 0 if no statements use it)
for (b, batch_id) in all_batches.iter().enumerate() {
for p in 0..target_pods {
let mut sum: Expression = 0.into();
for s in 0..n {
if input.costs[s].custom_batch_ids.contains(batch_id) {
model.add_constraint(constraint!(batch_used[b][p] >= prove[s][p]));
sum += prove[s][p];
}
}
model.add_constraint(constraint!(batch_used[b][p] <= sum));
}
}
// Batch count per POD
for p in 0..target_pods {
let batch_sum: Expression = (0..all_batches.len()).map(|b| batch_used[b][p]).sum();
model.add_constraint(constraint!(
batch_sum <= (input.params.max_custom_predicate_batches as f64) * pod_used[p]
));
}
// Constraint 7b: Anchored key tracking
//
// anchored_key_used[ak][p] = 1 when auto-insertion of a Contains is needed for anchored key ak in POD p.
// This happens when: some statement using ak is in POD p, AND the producing Contains is NOT in POD p.
//
// If a Contains statement explicitly produces ak (anchored_key_producers[ak] = Some(prod_idx)):
// - Lower: anchored_key_used[ak][p] >= prove[s][p] - prove[prod_idx][p] for all s using ak
// - Upper: anchored_key_used[ak][p] <= 1 - prove[prod_idx][p]
// This ensures overhead is 0 when the producer is in the same POD.
//
// If no Contains produces ak (anchored_key_producers[ak] = None):
// - Lower: anchored_key_used[ak][p] >= prove[s][p] for all s using ak
// - Upper: anchored_key_used[ak][p] <= sum of prove[s][p] for all s using ak
// Auto-insertion is always needed when any user is present.
for (ak_idx, ak) in input.all_anchored_keys.iter().enumerate() {
let producer = input.anchored_key_producers[ak_idx];
for p in 0..target_pods {
let mut user_sum: Expression = 0.into();
for s in 0..n {
if input.costs[s].anchored_keys.contains(ak) {
if let Some(prod_idx) = producer {
// Producer exists: only count overhead if producer not in this POD
model.add_constraint(constraint!(
anchored_key_used[ak_idx][p] >= prove[s][p] - prove[prod_idx][p]
));
} else {
// No producer: always need auto-insertion if user is present
model.add_constraint(constraint!(
anchored_key_used[ak_idx][p] >= prove[s][p]
));
}
user_sum += prove[s][p];
}
}
if let Some(prod_idx) = producer {
// If producer is in POD, no auto-insertion needed (overhead = 0)
model.add_constraint(constraint!(
anchored_key_used[ak_idx][p] <= 1 - prove[prod_idx][p]
));
} else {
// No producer: overhead is bounded by whether any user is present
model.add_constraint(constraint!(anchored_key_used[ak_idx][p] <= user_sum));
}
}
}
// Constraint 8a: Internal input POD tracking using uses_input
// uses_input[p][pp] >= prove[s][p] + public[d][pp] - prove[d][p] - 1
// for each dependency (s depends on d)
//
// If s is proved in p and d is public in pp, we need pp as input UNLESS d is also
// proved locally in p. Subtracting prove[d][p] ensures that when d is re-proved
// locally (prove[d][p] = 1), the constraint becomes uses_input >= 0, which is
// always satisfied without forcing the input relationship.
for s in 0..n {
for dep in &input.deps.statement_deps[s] {
if let StatementSource::Internal(d) = dep {
for p in 1..target_pods {
for pp in 0..p {
model.add_constraint(constraint!(
uses_input[p][pp] >= prove[s][p] + public[*d][pp] - prove[*d][p] - 1.0
));
}
}
}
}
}
// Constraint 8b: External input POD tracking using uses_external
// If statement s is proved in POD p and s depends on external POD e, then uses_external[p][e] = 1
for s in 0..n {
for dep in &input.deps.statement_deps[s] {
if let StatementSource::External(h) = dep {
if let Some(&e) = external_to_idx.get(h) {
for p in 0..target_pods {
// If s is proved in p, then uses_external[p][e] = 1
model.add_constraint(constraint!(uses_external[p][e] >= prove[s][p]));
}
}
}
}
}
// Constraint 8c: Total input PODs (internal + external) must not exceed max_input_pods
// For each POD p, the total number of inputs is:
// - Internal inputs: PODs pp < p that provide public statements used by p
// - External inputs: User-provided PODs referenced by statements in p
for p in 0..target_pods {
let internal_sum: Expression = if p > 0 {
(0..p).map(|pp| uses_input[p][pp]).sum()
} else {
0.into()
};
let external_sum: Expression = (0..external_pods.len()).map(|e| uses_external[p][e]).sum();
model.add_constraint(constraint!(
internal_sum + external_sum <= (input.params.max_input_pods as f64) * pod_used[p]
));
}
// Constraint 9: Symmetry breaking - use PODs in order
// pod_used[p] >= pod_used[p+1]
for p in 0..target_pods - 1 {
model.add_constraint(constraint!(pod_used[p] >= pod_used[p + 1]));
}
// Solve
let solution = match model.solve() {
Ok(sol) => sol,
Err(_) => {
// Infeasible with this number of PODs, try more
return Ok(None);
}
};
// Extract solution: count how many PODs are used.
// Symmetry breaking (Constraint 9) ensures PODs are used in order with no gaps.
let mut pod_count = 0;
for p in 0..target_pods {
if solution.value(pod_used[p]) > SOLVER_BINARY_THRESHOLD {
pod_count += 1;
}
}
let mut statement_to_pods: Vec<Vec<usize>> = vec![vec![]; n];
let mut pod_statements: Vec<Vec<usize>> = vec![vec![]; pod_count];
let mut pod_public_statements: Vec<BTreeSet<usize>> = vec![BTreeSet::new(); pod_count];
for s in 0..n {
for p in 0..pod_count {
if solution.value(prove[s][p]) > SOLVER_BINARY_THRESHOLD {
statement_to_pods[s].push(p);
pod_statements[p].push(s);
}
if solution.value(public[s][p]) > SOLVER_BINARY_THRESHOLD {
pod_public_statements[p].insert(s);
}
}
}
Ok(Some(MultiPodSolution {
pod_count,
statement_to_pods,
pod_statements,
pod_public_statements,
}))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_no_public_statements_error() {
// At least one public statement is required - otherwise the POD can't
// prove anything to an external verifier.
let params = Params::default();
let deps = DependencyGraph {
statement_deps: vec![],
};
let input = SolverInput {
num_statements: 0,
costs: &[],
deps: &deps,
output_public_indices: &[],
params: &params,
max_pods: 20,
all_anchored_keys: &[],
anchored_key_producers: &[],
statement_content_groups: &[],
};
let result = solve(&input);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("No public statements requested"));
}
}