From bf6d8aee8b64c4384ea5f3ab59f39f8ea20c6219 Mon Sep 17 00:00:00 2001 From: Rob Knight Date: Tue, 22 Apr 2025 04:19:20 -0700 Subject: [PATCH] Re-implement serialization (#201) * Serialization tests now pass again * Tidy up and test more edge-cases * Use attributes rather than custom serializer for arrays * Add JSON Schema support * Tests for JSON Schema generation and validation * Add comments * Support custom predicates * Clippy fixes * Make deserialization/constructor functions pub(crate) --- Cargo.toml | 7 +- src/backends/plonky2/mainpod/operation.rs | 8 +- src/backends/plonky2/mainpod/statement.rs | 4 +- src/backends/plonky2/mock/mainpod.rs | 30 +- src/backends/plonky2/mock/signedpod.rs | 14 +- src/backends/plonky2/primitives/merkletree.rs | 6 +- src/frontend/custom.rs | 1 - src/frontend/mod.rs | 14 +- src/frontend/operation.rs | 1 - src/frontend/serialization.rs | 294 +++++++++--------- src/middleware/basetypes.rs | 37 +-- src/middleware/containers.rs | 83 ++++- src/middleware/custom.rs | 31 +- src/middleware/mod.rs | 193 +++++++++++- src/middleware/operation.rs | 6 +- src/middleware/serialization.rs | 67 +++- src/middleware/statement.rs | 13 +- 17 files changed, 554 insertions(+), 255 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ed4c5d4..7f08a55 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,12 +22,17 @@ plonky2 = { git = "https://github.com/0xPolygonZero/plonky2", optional = true } serde = "1.0.219" serde_json = "1.0.140" base64 = "0.22.1" -schemars = "1.0.0-alpha.17" +schemars = "0.8.22" # Uncomment for debugging with https://github.com/ed255/plonky2/ at branch `feat/debug`. The repo directory needs to be checked out next to the pod2 repo directory. # [patch."https://github.com/0xPolygonZero/plonky2"] # plonky2 = { path = "../plonky2/plonky2" } +[dev-dependencies] +pretty_assertions = "1.4.1" +# Used only for testing JSON Schema generation and validation. +jsonschema = "0.30.0" + [features] default = ["backend_plonky2"] backend_plonky2 = ["plonky2"] diff --git a/src/backends/plonky2/mainpod/operation.rs b/src/backends/plonky2/mainpod/operation.rs index 4ec408e..80a6825 100644 --- a/src/backends/plonky2/mainpod/operation.rs +++ b/src/backends/plonky2/mainpod/operation.rs @@ -2,14 +2,14 @@ use std::fmt; use anyhow::{anyhow, Result}; use plonky2::field::types::Field; +use serde::{Deserialize, Serialize}; -// use serde::{Deserialize, Serialize}; use crate::{ backends::plonky2::{mainpod::Statement, primitives::merkletree::MerkleClaimAndProof}, middleware::{self, OperationType, Params, ToFields, F}, }; -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub enum OperationArg { None, Index(usize), @@ -31,7 +31,7 @@ impl OperationArg { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub enum OperationAux { None, MerkleProofIndex(usize), @@ -47,7 +47,7 @@ impl ToFields for OperationAux { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Operation(pub OperationType, pub Vec, pub OperationAux); impl Operation { diff --git a/src/backends/plonky2/mainpod/statement.rs b/src/backends/plonky2/mainpod/statement.rs index 225ce2b..6b41637 100644 --- a/src/backends/plonky2/mainpod/statement.rs +++ b/src/backends/plonky2/mainpod/statement.rs @@ -1,13 +1,13 @@ use std::fmt; use anyhow::{anyhow, Result}; +use serde::{Deserialize, Serialize}; -// use serde::{Deserialize, Serialize}; use crate::middleware::{ self, NativePredicate, Params, Predicate, StatementArg, ToFields, WildcardValue, }; -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Statement(pub Predicate, pub Vec); impl Statement { diff --git a/src/backends/plonky2/mock/mainpod.rs b/src/backends/plonky2/mock/mainpod.rs index 7d8343d..f9abf66 100644 --- a/src/backends/plonky2/mock/mainpod.rs +++ b/src/backends/plonky2/mock/mainpod.rs @@ -5,15 +5,15 @@ use std::{any::Any, fmt}; use anyhow::{anyhow, Result}; +use base64::{prelude::BASE64_STANDARD, Engine}; +use serde::{Deserialize, Serialize}; -// use base64::prelude::*; -// use serde::{Deserialize, Serialize}; -use crate::backends::plonky2::mainpod::process_private_statements_operations; use crate::{ backends::plonky2::{ mainpod::{ extract_merkle_proofs, hash_statements, layout_statements, normalize_statement, - process_public_statements_operations, Operation, Statement, + process_private_statements_operations, process_public_statements_operations, Operation, + Statement, }, primitives::merkletree::MerkleClaimAndProof, }, @@ -31,7 +31,7 @@ impl PodProver for MockProver { } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct MockMainPod { params: Params, id: PodId, @@ -166,14 +166,17 @@ impl MockMainPod { }) } - // pub fn deserialize(serialized: String) -> Result { - // let proof = String::from_utf8(BASE64_STANDARD.decode(&serialized)?) - // .map_err(|e| anyhow::anyhow!("Invalid base64 encoding: {}", e))?; - // let pod: MockMainPod = serde_json::from_str(&proof) - // .map_err(|e| anyhow::anyhow!("Failed to parse proof: {}", e))?; + // MockMainPods include some internal private state which is necessary + // for verification. In non-mock Pods, this state will not be necessary, + // as the public statements can be verified using a ZK proof. + pub(crate) fn deserialize(serialized: String) -> Result { + let proof = String::from_utf8(BASE64_STANDARD.decode(&serialized)?) + .map_err(|e| anyhow::anyhow!("Invalid base64 encoding: {}", e))?; + let pod: MockMainPod = serde_json::from_str(&proof) + .map_err(|e| anyhow::anyhow!("Failed to parse proof: {}", e))?; - // Ok(pod) - // } + Ok(pod) + } } impl Pod for MockMainPod { @@ -282,8 +285,7 @@ impl Pod for MockMainPod { } fn serialized_proof(&self) -> String { - todo!() - // BASE64_STANDARD.encode(serde_json::to_string(self).unwrap()) + BASE64_STANDARD.encode(serde_json::to_string(self).unwrap()) } } diff --git a/src/backends/plonky2/mock/signedpod.rs b/src/backends/plonky2/mock/signedpod.rs index a48979d..d79ba6b 100644 --- a/src/backends/plonky2/mock/signedpod.rs +++ b/src/backends/plonky2/mock/signedpod.rs @@ -43,15 +43,11 @@ pub struct MockSignedPod { kvs: HashMap, } -// impl MockSignedPod { -// pub fn deserialize(id: PodId, signature: String, dict: Dictionary) -> Self { -// Self { -// id, -// signature, -// dict, -// } -// } -// } +impl MockSignedPod { + pub(crate) fn new(id: PodId, signature: String, kvs: HashMap) -> Self { + Self { id, signature, kvs } + } +} impl Pod for MockSignedPod { fn verify(&self) -> Result<()> { diff --git a/src/backends/plonky2/primitives/merkletree.rs b/src/backends/plonky2/primitives/merkletree.rs index af0e5f5..ce5bf98 100644 --- a/src/backends/plonky2/primitives/merkletree.rs +++ b/src/backends/plonky2/primitives/merkletree.rs @@ -4,8 +4,8 @@ use std::{collections::HashMap, fmt, iter::IntoIterator}; use anyhow::{anyhow, Result}; use plonky2::field::types::Field; +use serde::{Deserialize, Serialize}; -// use serde::{Deserialize, Serialize}; pub use super::merkletree_circuit::*; use crate::middleware::{hash_fields, Hash, RawValue, EMPTY_HASH, EMPTY_VALUE, F}; @@ -208,7 +208,7 @@ impl fmt::Display for MerkleTree { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct MerkleProof { // note: currently we don't use the `_existence` field, we would use if we merge the methods // `verify` and `verify_nonexistence` into a single one @@ -260,7 +260,7 @@ impl MerkleProof { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct MerkleClaimAndProof { pub root: Hash, pub key: RawValue, diff --git a/src/frontend/custom.rs b/src/frontend/custom.rs index ac2898d..74ef68e 100644 --- a/src/frontend/custom.rs +++ b/src/frontend/custom.rs @@ -4,7 +4,6 @@ use std::{collections::HashMap, fmt, hash as h, iter, iter::zip, sync::Arc}; use anyhow::{anyhow, Result}; use schemars::JsonSchema; -// use serde::{Deserialize, Serialize}; use crate::{ frontend::{AnchoredKey, Statement, StatementArg}, middleware::{ diff --git a/src/frontend/mod.rs b/src/frontend/mod.rs index ff3692d..e73e49f 100644 --- a/src/frontend/mod.rs +++ b/src/frontend/mod.rs @@ -5,10 +5,8 @@ use std::{collections::HashMap, convert::From, fmt}; use anyhow::{anyhow, Result}; use itertools::Itertools; +use serde::{Deserialize, Serialize}; -// use schemars::JsonSchema; - -// use serde::{Deserialize, Serialize}; use crate::middleware::{ self, check_st_tmpl, hash_str, AnchoredKey, Key, MainPodInputs, NativeOperation, NativePredicate, OperationAux, OperationType, Params, PodId, PodProver, PodSigner, Predicate, @@ -17,8 +15,10 @@ use crate::middleware::{ mod custom; mod operation; +mod serialization; pub use custom::*; pub use operation::*; +use serialization::*; /// This type is just for presentation purposes. #[derive(Clone, Debug, Default, PartialEq, Eq)] @@ -66,8 +66,8 @@ impl SignedPodBuilder { /// SignedPod is a wrapper on top of backend::SignedPod, which additionally stores the /// string<-->hash relation of the keys. -#[derive(Debug, Clone)] -// #[serde(try_from = "SignedPodHelper", into = "SignedPodHelper")] +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(try_from = "SignedPodHelper", into = "SignedPodHelper")] pub struct SignedPod { pub pod: Box, // We store a copy of the key values for quick access @@ -591,8 +591,8 @@ impl MainPodBuilder { } } -#[derive(Debug, Clone)] -// #[serde(try_from = "MainPodHelper", into = "MainPodHelper")] +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(try_from = "MainPodHelper", into = "MainPodHelper")] pub struct MainPod { pub pod: Box, pub public_statements: Vec, diff --git a/src/frontend/operation.rs b/src/frontend/operation.rs index f4284f0..8e8bb99 100644 --- a/src/frontend/operation.rs +++ b/src/frontend/operation.rs @@ -1,6 +1,5 @@ use std::fmt; -// use serde::{Deserialize, Serialize}; use crate::{ frontend::SignedPod, middleware::{AnchoredKey, OperationAux, OperationType, Statement, Value}, diff --git a/src/frontend/serialization.rs b/src/frontend/serialization.rs index 7add3a8..116830a 100644 --- a/src/frontend/serialization.rs +++ b/src/frontend/serialization.rs @@ -1,19 +1,19 @@ -/* -use std::collections::{BTreeMap, HashMap}; +use std::collections::HashMap; -use schemars::{JsonSchema, Schema}; -use serde::{Deserialize, Serialize, Serializer}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use crate::{ backends::plonky2::mock::{mainpod::MockMainPod, signedpod::MockSignedPod}, - frontend::{containers::Dictionary, MainPod, SignedPod, Statement, TypedValue}, - middleware::PodId, + frontend::{MainPod, SignedPod, Statement}, + middleware::{containers::Dictionary, Key, PodId, Value}, }; #[derive(Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] #[schemars(title = "SignedPod")] pub struct SignedPodHelper { - entries: HashMap, + entries: HashMap, proof: String, pod_class: String, pod_type: String, @@ -30,10 +30,8 @@ impl TryFrom for SignedPod { return Err(anyhow::anyhow!("pod_type is not Mock")); } - let dict = Dictionary::new(helper.entries.clone())? - .middleware_dict() - .clone(); - let pod = MockSignedPod::deserialize(PodId(dict.commitment()), helper.proof, dict); + let dict = Dictionary::new(helper.entries.clone())?.clone(); + let pod = MockSignedPod::new(PodId(dict.commitment()), helper.proof, dict.kvs().clone()); Ok(SignedPod { pod: Box::new(pod), @@ -55,6 +53,7 @@ impl From for SignedPodHelper { #[derive(Serialize, Deserialize, JsonSchema)] #[schemars(title = "MainPod")] +#[serde(rename_all = "camelCase")] pub struct MainPodHelper { public_statements: Vec, proof: String, @@ -94,76 +93,29 @@ impl From for MainPodHelper { } } -pub fn serialize_i64(value: &i64, serializer: S) -> Result -where - S: serde::Serializer, -{ - serializer.serialize_str(&value.to_string()) -} - -pub fn deserialize_i64<'de, D>(deserializer: D) -> Result -where - D: serde::Deserializer<'de>, -{ - String::deserialize(deserializer)? - .parse() - .map_err(serde::de::Error::custom) -} - -// HashMap is not ordered, but we want our dictionaries to be ordered -// by key for serialization, so we turn HashMaps into BTreeMaps. -pub fn ordered_map( - value: &HashMap, - serializer: S, -) -> Result -where - S: Serializer, -{ - let ordered: BTreeMap<_, _> = value.iter().collect(); - ordered.serialize(serializer) -} - -pub fn transform_value_schema(schema: &mut Schema) { - let obj = schema.as_object_mut().unwrap(); - - // Get the oneOf array which contains our variant schemas - if let Some(one_of_container) = obj.get_mut("oneOf") { - if let Some(variants) = one_of_container.as_array_mut() { - // Add String variant (untagged) - variants.push(serde_json::json!({ - "type": "string" - })); - - // Add Boolean variant (untagged) - variants.push(serde_json::json!({ - "type": "boolean" - })); - - // Add Array variant (untagged) - variants.push(serde_json::json!({ - "type": "array", - "items": { - "$ref": "#/definitions/Value" - } - })); - } - } -} - #[cfg(test)] mod tests { - use anyhow::Result; - use schemars::generate::SchemaSettings; + use std::collections::HashSet; + use anyhow::Result; + // Pretty assertions give nicer diffs between expected and actual values + use pretty_assertions::assert_eq; + use schemars::schema_for; + + // use schemars::generate::SchemaSettings; use super::*; use crate::{ backends::plonky2::mock::{mainpod::MockProver, signedpod::MockSigner}, - examples::{zu_kyc_pod_builder, zu_kyc_sign_pod_builders}, - frontend::{ - containers::{Array, Dictionary, Set}, - SignedPodBuilder, + examples::{ + eth_dos_pod_builder, eth_friend_signed_pod_builder, zu_kyc_pod_builder, + zu_kyc_sign_pod_builders, + }, + frontend::SignedPodBuilder, + middleware::{ + self, + containers::{Array, Set}, + Params, TypedValue, }, - middleware::{self, Params}, }; #[test] @@ -174,34 +126,32 @@ mod tests { (TypedValue::Int(42), "{\"Int\":\"42\"}"), (TypedValue::Bool(true), "true"), ( - TypedValue::Array( - Array::new(vec![ - TypedValue::String("foo".to_string()), - TypedValue::Bool(false), - ]) - .unwrap(), - ), + TypedValue::Array(Array::new(vec!["foo".into(), false.into()]).unwrap()), "[\"foo\",false]", ), ( TypedValue::Dictionary( Dictionary::new(HashMap::from([ - ("foo".to_string(), TypedValue::Int(123)), - ("bar".to_string(), TypedValue::String("baz".to_string())), + // The set of valid keys is equal to the set of valid JSON keys + ("foo".into(), 123.into()), + // Empty strings are valid JSON keys + (("".into()), "baz".into()), + // Keys can contain whitespace + ((" hi".into()), false.into()), + // Keys can contain special characters + (("!@£$%^&&*()".into()), "".into()), + // Keys can contain _very_ special characters + (("\0".into()), "".into()), + // Keys can contain emojis + (("🥳".into()), "party time!".into()), ])) .unwrap(), ), - "{\"Dictionary\":{\"bar\":\"baz\",\"foo\":{\"Int\":\"123\"}}}", + "{\"Dictionary\":{\"\":\"baz\",\"\\u0000\":\"\",\" hi\":false,\"!@£$%^&&*()\":\"\",\"foo\":{\"Int\":\"123\"},\"🥳\":\"party time!\"}}", ), ( - TypedValue::Set( - Set::new(vec![ - TypedValue::String("foo".to_string()), - TypedValue::String("bar".to_string()), - ]) - .unwrap(), - ), - "{\"Set\":[\"foo\",\"bar\"]}", + TypedValue::Set(Set::new(HashSet::from(["foo".into(), "bar".into()])).unwrap()), + "{\"Set\":[\"bar\",\"foo\"]}", ), ]; @@ -209,14 +159,17 @@ mod tests { let serialized = serde_json::to_string(&value).unwrap(); assert_eq!(serialized, expected); let deserialized: TypedValue = serde_json::from_str(&serialized).unwrap(); - assert_eq!(value, deserialized); - let expected_deserialized: TypedValue = serde_json::from_str(&expected).unwrap(); + assert_eq!( + value, deserialized, + "value {:#?} should equal deserialized {:#?}", + value, deserialized + ); + let expected_deserialized: TypedValue = serde_json::from_str(expected).unwrap(); assert_eq!(value, expected_deserialized); } } - #[test] - fn test_signed_pod_serialization() { + fn build_signed_pod() -> Result { let mut signer = MockSigner { pk: "test".into() }; let mut builder = SignedPodBuilder::new(&Params::default()); builder.insert("name", "test"); @@ -224,44 +177,36 @@ mod tests { builder.insert("very_large_int", 1152921504606846976); builder.insert( "a_dict_containing_one_key", - TypedValue::Dictionary( - Dictionary::new(HashMap::from([ - ("foo".to_string(), TypedValue::Int(123)), - ( - "an_array_containing_three_ints".to_string(), - TypedValue::Array( - Array::new(vec![ - TypedValue::Int(1), - TypedValue::Int(2), - TypedValue::Int(3), - ]) - .unwrap(), - ), - ), - ( - "a_set_containing_two_strings".to_string(), - TypedValue::Set( - Set::new(vec![ - TypedValue::Array( - Array::new(vec![ - TypedValue::String("foo".to_string()), - TypedValue::String("bar".to_string()), - ]) - .unwrap(), - ), - TypedValue::String("baz".to_string()), - ]) - .unwrap(), - ), - ), - ])) - .unwrap(), - ), + Dictionary::new(HashMap::from([ + ("foo".into(), 123.into()), + ( + "an_array_containing_three_ints".into(), + Array::new(vec![1.into(), 2.into(), 3.into()]) + .unwrap() + .into(), + ), + ( + "a_set_containing_two_strings".into(), + Set::new(HashSet::from([ + Array::new(vec!["foo".into(), "bar".into()]).unwrap().into(), + "baz".into(), + ])) + .unwrap() + .into(), + ), + ])) + .unwrap(), ); let pod = builder.sign(&mut signer).unwrap(); + Ok(pod) + } - let serialized = serde_json::to_string(&pod).unwrap(); + #[test] + fn test_signed_pod_serialization() { + let pod = build_signed_pod().unwrap(); + + let serialized = serde_json::to_string_pretty(&pod).unwrap(); println!("serialized: {}", serialized); let deserialized: SignedPod = serde_json::from_str(&serialized).unwrap(); @@ -270,14 +215,11 @@ mod tests { assert_eq!(pod.id(), deserialized.id()) } - #[test] - fn test_main_pod_serialization() -> Result<()> { + fn build_zukyc_pod() -> Result { let params = middleware::Params::default(); - let sanctions_values = vec!["A343434340".into()]; - let sanction_set = TypedValue::Set(Set::new(sanctions_values)?); let (gov_id_builder, pay_stub_builder, sanction_list_builder) = - zu_kyc_sign_pod_builders(¶ms, &sanction_set); + zu_kyc_sign_pod_builders(¶ms); let mut signer = MockSigner { pk: "ZooGov".into(), }; @@ -295,8 +237,13 @@ mod tests { let mut prover = MockProver {}; let kyc_pod = kyc_builder.prove(&mut prover, ¶ms).unwrap(); + Ok(kyc_pod) + } - let serialized = serde_json::to_string(&kyc_pod).unwrap(); + #[test] + fn test_main_pod_serialization() -> Result<()> { + let kyc_pod = build_zukyc_pod()?; + let serialized = serde_json::to_string_pretty(&kyc_pod).unwrap(); println!("serialized: {}", serialized); let deserialized: MainPod = serde_json::from_str(&serialized).unwrap(); @@ -307,17 +254,70 @@ mod tests { Ok(()) } - #[test] - fn test_schema() { - let generator = SchemaSettings::draft07().into_generator(); - let mainpod_schema = generator.clone().into_root_schema_for::(); - let signedpod_schema = generator.into_root_schema_for::(); + fn build_ethdos_pod() -> Result { + let params = Params { + max_input_signed_pods: 3, + max_input_main_pods: 3, + max_statements: 31, + max_signed_pod_values: 8, + max_public_statements: 10, + max_statement_args: 6, + max_operation_args: 5, + max_custom_predicate_arity: 5, + max_custom_batch_size: 5, + max_custom_predicate_wildcards: 12, + ..Default::default() + }; - println!("{}", serde_json::to_string_pretty(&mainpod_schema).unwrap()); - println!( - "{}", - serde_json::to_string_pretty(&signedpod_schema).unwrap() - ); + let mut alice = MockSigner { pk: "Alice".into() }; + let bob = MockSigner { pk: "Bob".into() }; + let mut charlie = MockSigner { + pk: "Charlie".into(), + }; + + // Alice attests that she is ETH friends with Charlie and Charlie + // attests that he is ETH friends with Bob. + let alice_attestation = + eth_friend_signed_pod_builder(¶ms, charlie.pubkey().into()).sign(&mut alice)?; + let charlie_attestation = + eth_friend_signed_pod_builder(¶ms, bob.pubkey().into()).sign(&mut charlie)?; + + let mut prover = MockProver {}; + let alice_bob_ethdos = eth_dos_pod_builder( + ¶ms, + &alice_attestation, + &charlie_attestation, + &bob.pubkey().into(), + )? + .prove(&mut prover, ¶ms)?; + + Ok(alice_bob_ethdos) + } + + #[test] + // This tests that we can generate JSON Schemas for the MainPod and + // SignedPod types, and that we can validate real Signed and Main Pods + // against the schemas. + fn test_schema() { + let mainpod_schema = schema_for!(MainPodHelper); + let signedpod_schema = schema_for!(SignedPodHelper); + + let kyc_pod = build_zukyc_pod().unwrap(); + let signed_pod = build_signed_pod().unwrap(); + let ethdos_pod = build_ethdos_pod().unwrap(); + let mainpod_schema_value = serde_json::to_value(&mainpod_schema).unwrap(); + let signedpod_schema_value = serde_json::to_value(&signedpod_schema).unwrap(); + + let kyc_pod_value = serde_json::to_value(&kyc_pod).unwrap(); + let mainpod_valid = jsonschema::validate(&mainpod_schema_value, &kyc_pod_value); + assert!(mainpod_valid.is_ok(), "{:#?}", mainpod_valid); + + let signed_pod_value = serde_json::to_value(&signed_pod).unwrap(); + let signedpod_valid = jsonschema::validate(&signedpod_schema_value, &signed_pod_value); + assert!(signedpod_valid.is_ok(), "{:#?}", signedpod_valid); + + let ethdos_pod_value = serde_json::to_value(ðdos_pod).unwrap(); + let ethdos_pod_valid = jsonschema::validate(&mainpod_schema_value, ðdos_pod_value); + assert!(ethdos_pod_valid.is_ok(), "{:#?}", ethdos_pod_valid); } } -*/ diff --git a/src/middleware/basetypes.rs b/src/middleware/basetypes.rs index 5dcb0d5..66f1fda 100644 --- a/src/middleware/basetypes.rs +++ b/src/middleware/basetypes.rs @@ -55,15 +55,11 @@ use plonky2::{ hash::poseidon::PoseidonHash, plonk::config::Hasher, }; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; -use crate::middleware::{ - // serialization::{ - // deserialize_hash_tuple, deserialize_value_tuple, serialize_hash_tuple, - // serialize_value_tuple, - // }, - Params, - ToFields, -}; +use super::serialization::*; +use crate::middleware::{Params, ToFields}; /// F is the native field we use everywhere. Currently it's Goldilocks from plonky2 pub type F = GoldilocksField; @@ -75,16 +71,15 @@ pub const EMPTY_VALUE: RawValue = RawValue([F::ZERO, F::ZERO, F::ZERO, F::ZERO]) pub const SELF_ID_HASH: Hash = Hash([F::ONE, F::ZERO, F::ZERO, F::ZERO]); pub const EMPTY_HASH: Hash = Hash([F::ZERO, F::ZERO, F::ZERO, F::ZERO]); -#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq)] -// #[schemars(rename = "RawValue")] +#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] pub struct RawValue( - // #[serde( - // serialize_with = "serialize_value_tuple", - // deserialize_with = "deserialize_value_tuple" - // )] + #[serde( + serialize_with = "serialize_value_tuple", + deserialize_with = "deserialize_value_tuple" + )] // We know that Serde will serialize and deserialize this as a string, so we can // use the JsonSchema to validate the format. - // #[schemars(with = "String", regex(pattern = r"^[0-9a-fA-F]{64}$"))] + #[schemars(with = "String", regex(pattern = r"^[0-9a-fA-F]{64}$"))] pub [F; VALUE_SIZE], ); @@ -152,13 +147,13 @@ impl fmt::Display for RawValue { } } -#[derive(Clone, Copy, Debug, Default, Hash, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Default, Hash, Eq, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct Hash( - // #[serde( - // serialize_with = "serialize_hash_tuple", - // deserialize_with = "deserialize_hash_tuple" - // )] - // #[schemars(with = "String", regex(pattern = r"^[0-9a-fA-F]{64}$"))] + #[serde( + serialize_with = "serialize_hash_tuple", + deserialize_with = "deserialize_hash_tuple" + )] + #[schemars(with = "String", regex(pattern = r"^[0-9a-fA-F]{64}$"))] pub [F; HASH_SIZE], ); diff --git a/src/middleware/containers.rs b/src/middleware/containers.rs index 4c819ac..9b1b569 100644 --- a/src/middleware/containers.rs +++ b/src/middleware/containers.rs @@ -3,7 +3,10 @@ use std::collections::{HashMap, HashSet}; /// This file implements the types defined at /// https://0xparc.github.io/pod2/values.html#dictionary-array-set . use anyhow::{anyhow, Result}; +use schemars::JsonSchema; +use serde::{Deserialize, Deserializer, Serialize}; +use super::serialization::{ordered_map, ordered_set}; #[cfg(feature = "backend_plonky2")] use crate::backends::plonky2::primitives::merkletree::{MerkleProof, MerkleTree}; use crate::{ @@ -14,9 +17,12 @@ use crate::{ /// Dictionary: the user original keys and values are hashed to be used in the leaf. /// leaf.key=hash(original_key) /// leaf.value=hash(original_value) -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] +#[serde(transparent)] pub struct Dictionary { + #[serde(skip)] mt: MerkleTree, + #[serde(serialize_with = "ordered_map")] kvs: HashMap, } @@ -76,12 +82,36 @@ impl PartialEq for Dictionary { } impl Eq for Dictionary {} +impl<'de> Deserialize<'de> for Dictionary { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let kvs: HashMap = HashMap::deserialize(deserializer)?; + Dictionary::new(kvs).map_err(serde::de::Error::custom) + } +} + +impl JsonSchema for Dictionary { + fn schema_name() -> String { + "Dictionary".to_string() + } + + fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { + // Just use the schema of HashMap since that's what we're actually serializing + >::json_schema(gen) + } +} + /// Set: the value field of the leaf is unused, and the key contains the hash of the element. /// leaf.key=hash(original_value) /// leaf.value=0 -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] +#[serde(transparent)] pub struct Set { + #[serde(skip)] mt: MerkleTree, + #[serde(serialize_with = "ordered_set")] set: HashSet, } @@ -134,12 +164,38 @@ impl PartialEq for Set { } impl Eq for Set {} +impl<'de> Deserialize<'de> for Set { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + // Deserialize the set directly + let set: HashSet = HashSet::deserialize(deserializer)?; + + // Create a new Set using the set field + Set::new(set).map_err(serde::de::Error::custom) + } +} + +impl JsonSchema for Set { + fn schema_name() -> String { + "Set".to_string() + } + + fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { + // Just use the schema of HashSet since that's what we're actually serializing + >::json_schema(gen) + } +} + /// Array: the elements are placed at the value field of each leaf, and the key field is just the /// array index (integer). /// leaf.key=i /// leaf.value=original_value -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize)] +#[serde(transparent)] pub struct Array { + #[serde(skip)] mt: MerkleTree, array: Vec, } @@ -190,3 +246,24 @@ impl PartialEq for Array { } } impl Eq for Array {} + +impl<'de> Deserialize<'de> for Array { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let array: Vec = Vec::deserialize(deserializer)?; + Array::new(array).map_err(serde::de::Error::custom) + } +} + +impl JsonSchema for Array { + fn schema_name() -> String { + "Array".to_string() + } + + fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { + // Just use the schema of Vec since that's what we're actually serializing + >::json_schema(gen) + } +} diff --git a/src/middleware/custom.rs b/src/middleware/custom.rs index 63832a9..4ab3d57 100644 --- a/src/middleware/custom.rs +++ b/src/middleware/custom.rs @@ -2,16 +2,14 @@ use std::{fmt, iter, sync::Arc}; use anyhow::{anyhow, Result}; use plonky2::field::types::Field; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; -// use schemars::JsonSchema; - -// use serde::{Deserialize, Serialize}; -use crate::{ - middleware::HASH_SIZE, - middleware::{hash_fields, Hash, Key, NativePredicate, Params, ToFields, Value, F}, +use crate::middleware::{ + hash_fields, Hash, Key, NativePredicate, Params, ToFields, Value, F, HASH_SIZE, }; -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct Wildcard { pub name: String, pub index: usize, @@ -35,7 +33,8 @@ impl ToFields for Wildcard { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "type", content = "value")] pub enum KeyOrWildcard { Key(Key), Wildcard(Wildcard), @@ -62,7 +61,8 @@ impl ToFields for KeyOrWildcard { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "type", content = "value")] pub enum StatementTmplArg { None, Literal(Value), @@ -126,7 +126,7 @@ impl fmt::Display for StatementTmplArg { } /// Statement Template for a Custom Predicate -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct StatementTmpl { pub pred: Predicate, pub args: Vec, @@ -178,7 +178,8 @@ impl ToFields for StatementTmpl { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] /// NOTE: fields are not public (outside of crate) to enforce the struct instantiation through /// the `::and/or` methods, which performs checks on the values. pub struct CustomPredicate { @@ -278,7 +279,7 @@ impl fmt::Display for CustomPredicate { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct CustomPredicateBatch { pub name: String, pub predicates: Vec, @@ -315,7 +316,7 @@ impl CustomPredicateBatch { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] pub struct CustomPredicateRef { pub batch: Arc, pub index: usize, @@ -330,8 +331,8 @@ impl CustomPredicateRef { } } -#[derive(Clone, Debug, PartialEq)] -// #[serde(tag = "type", content = "value")] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "type", content = "value")] pub enum Predicate { Native(NativePredicate), BatchSelf(usize), diff --git a/src/middleware/mod.rs b/src/middleware/mod.rs index 59a8cb6..5c776bd 100644 --- a/src/middleware/mod.rs +++ b/src/middleware/mod.rs @@ -10,6 +10,8 @@ use std::{ use anyhow::anyhow; use containers::{Array, Dictionary, Set}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; pub mod containers; mod custom; mod operation; @@ -22,8 +24,7 @@ pub use basetypes::*; pub use custom::*; use dyn_clone::DynClone; pub use operation::*; -// use schemars::JsonSchema; -// use serde::{Deserialize, Serialize}; +use serialization::*; pub use statement::*; use crate::backends::plonky2::primitives::merkletree::MerkleProof; @@ -31,7 +32,7 @@ use crate::backends::plonky2::primitives::merkletree::MerkleProof; pub const SELF: PodId = PodId(SELF_ID_HASH); // TODO: Move all value-related types to to `value.rs` -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] // TODO #[schemars(transform = serialization::transform_value_schema)] pub enum TypedValue { // Serde cares about the order of the enum variants, with untagged variants @@ -49,21 +50,18 @@ pub enum TypedValue { Set(Set), Dictionary(Dictionary), Int( - // TODO #[serde(serialize_with = "serialize_i64", deserialize_with = "deserialize_i64")] + #[serde(serialize_with = "serialize_i64", deserialize_with = "deserialize_i64")] // #[schemars(with = "String", regex(pattern = r"^\d+$"))] i64, ), // Uses the serialization for middleware::Value: Raw(RawValue), // UNTAGGED TYPES: - // #[serde(untagged)] - // #[schemars(skip)] + #[serde(untagged)] Array(Array), - // #[serde(untagged)] - // #[schemars(skip)] + #[serde(untagged)] String(String), - // #[serde(untagged)] - // #[schemars(skip)] + #[serde(untagged)] Bool(bool), } @@ -176,6 +174,106 @@ impl From<&TypedValue> for RawValue { } } +// Schemars/JsonSchema can't handle Serde's "untagged" variants. +// Instead, we have to implement schema generation directly. It's not as +// complicated as it looks, though. +// We have to generate schemas for each of the variants, and then combine them +// into a single schema using the `anyOf` keyword. +// If we add a new variant, we will have to update this function. +impl JsonSchema for TypedValue { + fn schema_name() -> String { + "TypedValue".to_string() + } + + fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { + use schemars::schema::{InstanceType, Schema, SchemaObject, SingleOrVec}; + + let set_schema = schemars::schema::SchemaObject { + instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))), + object: Some(Box::new(schemars::schema::ObjectValidation { + properties: [("Set".to_string(), gen.subschema_for::())] + .into_iter() + .collect(), + required: ["Set".to_string()].into_iter().collect(), + ..Default::default() + })), + ..Default::default() + }; + + let dictionary_schema = schemars::schema::SchemaObject { + instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))), + object: Some(Box::new(schemars::schema::ObjectValidation { + properties: [("Dictionary".to_string(), gen.subschema_for::())] + .into_iter() + .collect(), + required: ["Dictionary".to_string()].into_iter().collect(), + ..Default::default() + })), + ..Default::default() + }; + + // Int is serialized/deserialized as a tagged string + let int_schema = schemars::schema::SchemaObject { + instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))), + object: Some(Box::new(schemars::schema::ObjectValidation { + properties: [( + "Int".to_string(), + Schema::Object(SchemaObject { + instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::String))), + metadata: Some(Box::new(schemars::schema::Metadata { + description: Some("An i64 represented as a string.".to_string()), + ..Default::default() + })), + ..Default::default() + }), + )] + .into_iter() + .collect(), + required: ["Int".to_string()].into_iter().collect(), + ..Default::default() + })), + ..Default::default() + }; + + let raw_schema = schemars::schema::SchemaObject { + instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))), + object: Some(Box::new(schemars::schema::ObjectValidation { + properties: [("Raw".to_string(), gen.subschema_for::())] + .into_iter() + .collect(), + required: ["Raw".to_string()].into_iter().collect(), + ..Default::default() + })), + ..Default::default() + }; + + // This is the part that Schemars can't generate automatically: + let untagged_array_schema = gen.subschema_for::(); + let untagged_string_schema = gen.subschema_for::(); + let untagged_bool_schema = gen.subschema_for::(); + + Schema::Object(SchemaObject { + subschemas: Some(Box::new(schemars::schema::SubschemaValidation { + any_of: Some(vec![ + Schema::Object(set_schema), + Schema::Object(dictionary_schema), + Schema::Object(int_schema), + Schema::Object(raw_schema), + untagged_array_schema, + untagged_string_schema, + untagged_bool_schema, + ]), + ..Default::default() + })), + metadata: Some(Box::new(schemars::schema::Metadata { + description: Some("Represents various POD value types. Array, String, and Bool variants are represented untagged in JSON.".to_string()), + ..Default::default() + })), + ..Default::default() + }) + } +} + #[derive(Clone, Debug)] pub struct Value { // The `TypedValue` is under `Arc` so that cloning a `Value` is cheap. @@ -183,6 +281,37 @@ pub struct Value { raw: RawValue, } +// Values are serialized as their TypedValue. +impl Serialize for Value { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.typed.serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for Value { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let typed = TypedValue::deserialize(deserializer)?; + Ok(Value::new(typed)) + } +} + +impl JsonSchema for Value { + fn schema_name() -> String { + "Value".to_string() + } + + fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { + // Just use the schema of TypedValue since that's what we're actually serializing + ::json_schema(gen) + } +} + impl PartialEq for Value { fn eq(&self, other: &Self) -> bool { self.raw == other.raw @@ -336,7 +465,44 @@ impl From for RawValue { } } -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +// When serializing a Key, we serialize only the name field, and not the hash. +// We can't directly tell Serde to render the whole struct as a string, so we +// implement our own serialization. It's important that if we change the +// structure of the Key struct, we update this implementation. +impl Serialize for Key { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + self.name.serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for Key { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let name = String::deserialize(deserializer)?; + Ok(Key::new(name)) + } +} + +// As per the above, we implement custom serialization for the Key type, and +// Schemars can't automatically generate a schema for it. Instead, we tell it +// to use the standard String schema. +impl JsonSchema for Key { + fn schema_name() -> String { + "Key".to_string() + } + + fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema { + ::json_schema(gen) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] pub struct AnchoredKey { pub pod_id: PodId, pub key: Key, @@ -364,7 +530,7 @@ where } } -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize, JsonSchema)] pub struct PodId(pub Hash); impl ToFields for PodId { @@ -394,7 +560,8 @@ impl fmt::Display for PodType { } } -#[derive(Clone, Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] pub struct Params { pub max_input_signed_pods: usize, pub max_input_main_pods: usize, diff --git a/src/middleware/operation.rs b/src/middleware/operation.rs index 22f5d34..df97c1b 100644 --- a/src/middleware/operation.rs +++ b/src/middleware/operation.rs @@ -3,8 +3,8 @@ use std::{fmt, iter, sync::Arc}; use anyhow::{anyhow, Result}; use log::error; use plonky2::field::types::Field; +use serde::{Deserialize, Serialize}; -// use serde::{Deserialize, Serialize}; use crate::{ backends::plonky2::primitives::merkletree::MerkleProof, middleware::{ @@ -14,7 +14,7 @@ use crate::{ }, }; -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub enum OperationType { Native(NativeOperation), Custom(CustomPredicateRef), @@ -54,7 +54,7 @@ impl ToFields for OperationType { } } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] pub enum NativeOperation { None = 0, NewEntry = 1, diff --git a/src/middleware/serialization.rs b/src/middleware/serialization.rs index eafa920..a2ead6f 100644 --- a/src/middleware/serialization.rs +++ b/src/middleware/serialization.rs @@ -1,8 +1,9 @@ -// TODO: Reenable -/* -use plonky2::field::types::Field; -use serde::Deserialize; +use std::collections::{HashMap, HashSet}; +use plonky2::field::types::Field; +use serde::{ser::SerializeSeq, Deserialize, Serialize, Serializer}; + +use super::{Key, Value}; use crate::middleware::{F, HASH_SIZE, VALUE_SIZE}; fn serialize_field_tuple( @@ -69,4 +70,60 @@ where { deserialize_field_tuple::(deserializer) } -*/ + +pub fn serialize_i64(value: &i64, serializer: S) -> Result +where + S: serde::Serializer, +{ + serializer.serialize_str(&value.to_string()) +} + +pub fn deserialize_i64<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + String::deserialize(deserializer)? + .parse() + .map_err(serde::de::Error::custom) +} + +// In order to serialize a Dictionary consistently, we want to order the +// key-value pairs by the key's name field. This has no effect on the hashes +// of the keys and therefore on the Merkle tree, but it makes the serialized +// output deterministic. +pub fn ordered_map( + value: &HashMap, + serializer: S, +) -> Result +where + S: Serializer, +{ + // Convert to Vec and sort by the key's name field + let mut pairs: Vec<_> = value.iter().collect(); + pairs.sort_by(|(k1, _), (k2, _)| k1.name.cmp(&k2.name)); + + // Serialize as a map + use serde::ser::SerializeMap; + let mut map = serializer.serialize_map(Some(pairs.len()))?; + for (k, v) in pairs { + map.serialize_entry(k, v)?; + } + map.end() +} + +// Sets are serialized as sequences of elements, which are not ordered by +// default. We want to serialize them in a deterministic way, and we can +// achieve this by sorting the elements. This takes advantage of the fact that +// Value implements Ord. +pub fn ordered_set(value: &HashSet, serializer: S) -> Result +where + S: Serializer, +{ + let mut set = serializer.serialize_seq(Some(value.len()))?; + let mut sorted_values: Vec<&Value> = value.iter().collect(); + sorted_values.sort(); + for v in sorted_values { + set.serialize_element(v)?; + } + set.end() +} diff --git a/src/middleware/statement.rs b/src/middleware/statement.rs index a648f65..f3049e8 100644 --- a/src/middleware/statement.rs +++ b/src/middleware/statement.rs @@ -2,8 +2,8 @@ use std::{fmt, iter}; use anyhow::{anyhow, Result}; use plonky2::field::types::Field; -// use schemars::JsonSchema; -// use serde::{Deserialize, Serialize}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use strum_macros::FromRepr; use crate::middleware::{ @@ -20,7 +20,7 @@ pub const STATEMENT_ARG_F_LEN: usize = 8; pub const OPERATION_ARG_F_LEN: usize = 1; pub const OPERATION_AUX_F_LEN: usize = 1; -#[derive(Clone, Copy, Debug, FromRepr, PartialEq, Eq, Hash)] +#[derive(Clone, Copy, Debug, FromRepr, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)] pub enum NativePredicate { None = 0, ValueOf = 1, @@ -49,7 +49,7 @@ impl ToFields for NativePredicate { } } -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] pub enum WildcardValue { PodId(PodId), Key(Key), @@ -83,7 +83,8 @@ impl ToFields for WildcardValue { } /// Type encapsulating statements with their associated arguments. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "predicate", content = "args")] pub enum Statement { None, ValueOf(AnchoredKey, Value), @@ -275,7 +276,7 @@ impl fmt::Display for Statement { } /// Statement argument type. Useful for statement decompositions. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub enum StatementArg { None, Literal(Value),