Re-implement merkletree with persistent storage (key-value db) (#487)

* refactor merkletree to work with disk keyvalue database (wip)

* various fixes post reimplementation; pending delete leaf

* add delete operation case for the new in db tree approach

* polish tree update & delete; everything works (pending polishing)

* polish panics into errs, prints, etc

* Implement iterator

* Lint

* fix case no-siblings

* case delete with semi-empty branch

* polishing

* starting to add rocksdb & heeddb for the DB & Txn traits

* Satisfy the borrow checker

* abstract merkletree tests to use the various available DBs

* update store_node interface (rm hash input), rm heed.rs

* polishing

* typos

* Ditch transactions

* add feature for rocksdb, return errs at new_with_db, remove empty leaf case in Leaf::new

* intermediate instead of leaf in empty node when deleting leaf

---------

Co-authored-by: Ahmad <root@ahmadafuni.com>
This commit is contained in:
arnaucube 2026-03-11 16:32:42 +01:00 committed by GitHub
parent a79f82eb9d
commit 32f45872d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 974 additions and 431 deletions

View file

@ -48,6 +48,7 @@ good_lp = { version = "1.8", default-features = false, features = [
"scip_bundled", "scip_bundled",
] } ] }
annotate-snippets = "0.11" annotate-snippets = "0.11"
rocksdb = { version = "0.24.0", optional = true } # keyvalue database for merkletree
# Uncomment for debugging with https://github.com/ed255/plonky2/ at branch `feat/debug`. The repo directory needs to be checked out next to the pod2 repo directory. # Uncomment for debugging with https://github.com/ed255/plonky2/ at branch `feat/debug`. The repo directory needs to be checked out next to the pod2 repo directory.
# [patch."https://github.com/0xPARC/plonky2"] # [patch."https://github.com/0xPARC/plonky2"]
@ -57,12 +58,13 @@ annotate-snippets = "0.11"
pretty_assertions = "1.4.1" pretty_assertions = "1.4.1"
# Used only for testing JSON Schema generation and validation. # Used only for testing JSON Schema generation and validation.
jsonschema = "0.30.0" jsonschema = "0.30.0"
tempfile = "3"
[build-dependencies] [build-dependencies]
vergen-gitcl = { version = "1.0.0", features = ["build"] } vergen-gitcl = { version = "1.0.0", features = ["build"] }
[features] [features]
default = ["backend_plonky2", "zk", "mem_cache"] default = ["backend_plonky2", "zk", "mem_cache", "db_rocksdb"]
backend_plonky2 = ["plonky2"] backend_plonky2 = ["plonky2"]
zk = [] zk = []
metrics = [] metrics = []
@ -70,6 +72,7 @@ time = []
examples = [] examples = []
disk_cache = ["directories", "minicbor-serde"] disk_cache = ["directories", "minicbor-serde"]
mem_cache = [] mem_cache = []
db_rocksdb = ["rocksdb"]
# Uncomment in order to enable debug information in the release builds. This allows getting panic backtraces with a performance similar to regular release. # Uncomment in order to enable debug information in the release builds. This allows getting panic backtraces with a performance similar to regular release.
# [profile.release] # [profile.release]

View file

@ -2276,9 +2276,9 @@ mod tests {
] ]
.into_iter() .into_iter()
.for_each(|(op, st)| { .for_each(|(op, st)| {
let check = std::panic::catch_unwind(|| { let check = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
operation_verify(st, op, prev_statements.to_vec(), Aux::default()) operation_verify(st, op, prev_statements.to_vec(), Aux::default())
}); }));
match check { match check {
Err(e) => { Err(e) => {
let err_string = e.downcast_ref::<String>().unwrap(); let err_string = e.downcast_ref::<String>().unwrap();
@ -2689,9 +2689,9 @@ mod tests {
); );
let prev_statements = [Statement::None.into()]; let prev_statements = [Statement::None.into()];
let check = std::panic::catch_unwind(|| { let check = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
operation_verify(st, op, prev_statements.to_vec(), Aux::default()) operation_verify(st, op, prev_statements.to_vec(), Aux::default())
}); }));
match check { match check {
Err(e) => { Err(e) => {
let err_string = e.downcast_ref::<String>().unwrap(); let err_string = e.downcast_ref::<String>().unwrap();

View file

@ -32,7 +32,7 @@ use crate::{
circuits::common::{CircuitBuilderPod, ValueTarget}, circuits::common::{CircuitBuilderPod, ValueTarget},
error::{Error, Result}, error::{Error, Result},
primitives::merkletree::{ primitives::merkletree::{
MerkleClaimAndProof, MerkleTreeOp, MerkleTreeStateTransitionProof, TreeError, MerkleClaimAndProof, MerkleTreeOp, MerkleTreeStateTransitionProof, TreeError, MAX_DEPTH,
}, },
}, },
measure_gates_begin, measure_gates_end, measure_gates_begin, measure_gates_end,
@ -703,10 +703,13 @@ impl MerkleTreeStateTransitionProofTarget {
{ {
pw.set_hash_target(self.siblings[i], HashOut::from_vec(sibling.0.to_vec()))?; pw.set_hash_target(self.siblings[i], HashOut::from_vec(sibling.0.to_vec()))?;
} }
pw.set_target( let div_lvl = if new_siblings.is_empty() {
self.divergence_level, // don't subtract since it would underflow, use MAX_DEPTH
F::from_canonical_u64((new_siblings.len() - 1) as u64), MAX_DEPTH as u64
)?; } else {
(new_siblings.len() - 1) as u64
};
pw.set_target(self.divergence_level, F::from_canonical_u64(div_lvl))?;
Ok(()) Ok(())
} }

View file

@ -0,0 +1,109 @@
//! Module that implements the key-value DB used at the MerkleTree module.
use std::{
collections::HashMap,
fmt::Debug,
sync::{Arc, Mutex},
};
use anyhow::{anyhow, bail, Result};
use dyn_clone::DynClone;
use crate::{
backends::plonky2::primitives::merkletree::{Leaf, Node},
middleware::{RawValue, EMPTY_VALUE},
};
#[cfg(feature = "db_rocksdb")]
pub mod rocks;
pub trait DB: Debug + DynClone + Sync + Send {
fn load_node(&self, hash: RawValue) -> Result<Node>;
fn store_node(&mut self, node: Node) -> Result<()>;
}
dyn_clone::clone_trait_object!(DB);
/// MemDB implements the DB trait in a in-memory HashMap.
#[derive(Clone, Debug, Default)]
pub(crate) struct MemDB {
inner: Arc<Mutex<HashMap<RawValue, Node>>>,
}
impl MemDB {
pub fn new() -> Self {
Self::default()
}
}
impl DB for MemDB {
fn load_node(&self, hash: RawValue) -> Result<Node> {
let db = self
.inner
.lock()
.map_err(|e| anyhow!("failed to acquire memdb lock for read: {}", e))?;
if let Some(node) = db.get(&hash) {
return Ok(node.clone());
}
if hash == EMPTY_VALUE {
return Ok(Node::Leaf(Leaf::new(hash, EMPTY_VALUE)));
}
bail!("MemDB error: node not found: {}", hash);
}
fn store_node(&mut self, node: Node) -> Result<()> {
let mut db = self
.inner
.lock()
.map_err(|e| anyhow!("failed to acquire memdb lock for write: {}", e))?;
db.insert(node.hash().into(), node);
Ok(())
}
}
// NOTE: this can be replaced by `.to_bytes` & `from_bytes` optimized methods at `Node`
#[allow(dead_code)]
fn encode_node(node: &Node) -> Result<Vec<u8>> {
serde_json::to_vec(node).map_err(|e| anyhow!("failed to serialize node: {e}"))
}
#[allow(dead_code)]
fn decode_node(bytes: &[u8]) -> Result<Node> {
serde_json::from_slice(bytes).map_err(|e| anyhow!("failed to deserialize node: {e}"))
}
#[cfg(test)]
pub mod tests {
use super::*;
#[test]
fn test_db() -> Result<()> {
let mut db = MemDB::new();
test_db_opt(&mut db)?;
#[cfg(feature = "db_rocksdb")]
{
let path = "/tmp/rocksdb";
let mut db = rocks::RocksDB::open(path)?;
test_db_opt(&mut db)?;
}
Ok(())
}
fn test_db_opt(db: &mut dyn DB) -> Result<()> {
let node = Leaf::new(1.into(), 1.into());
db.store_node(Node::Leaf(node.clone()))?;
let obtained_node = db.load_node(node.hash.into())?;
let leaf = match obtained_node {
Node::Leaf(l) => l,
_ => panic!("expected a leaf"),
};
assert_eq!(leaf.hash, node.hash);
Ok(())
}
}

View file

@ -0,0 +1,58 @@
use std::{fmt, path::Path, sync::Arc};
use anyhow::{anyhow, Result};
use rocksdb::{Options, TransactionDB, TransactionDBOptions};
use super::DB;
use crate::{
backends::plonky2::primitives::merkletree::{Leaf, Node},
middleware::{RawValue, EMPTY_VALUE},
};
#[derive(Clone)]
pub struct RocksDB(Arc<TransactionDB>);
#[allow(dead_code)]
impl RocksDB {
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
let mut options = Options::default();
options.create_if_missing(true);
let txn_options = TransactionDBOptions::default();
let inner =
TransactionDB::open(&options, &txn_options, path).map_err(|e| anyhow!("{e}"))?;
Ok(Self(Arc::new(inner)))
}
}
impl fmt::Debug for RocksDB {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "RocksDB")
}
}
impl DB for RocksDB {
fn load_node(&self, hash: RawValue) -> Result<Node> {
if hash == EMPTY_VALUE {
return Ok(Node::Leaf(Leaf::new(hash, EMPTY_VALUE)));
}
let maybe_node_bytes = self
.0
.get(hash.to_bytes())
.map_err(|e| anyhow!("rocksdb transaction get failed: {e}"))?;
match maybe_node_bytes {
Some(bytes) => super::decode_node(&bytes),
None => Err(anyhow!("rocksdb: node not found")),
}
}
fn store_node(&mut self, node: Node) -> Result<()> {
self.0
.put(
RawValue::from(node.hash()).to_bytes(),
super::encode_node(&node)?,
)
.map_err(|e| anyhow!("rocksdb transaction put failed: {e}"))
}
}

File diff suppressed because it is too large Load diff