Re-implement serialization (#201)

* Serialization tests now pass again

* Tidy up and test more edge-cases

* Use attributes rather than custom serializer for arrays

* Add JSON Schema support

* Tests for JSON Schema generation and validation

* Add comments

* Support custom predicates

* Clippy fixes

* Make deserialization/constructor functions pub(crate)
This commit is contained in:
Rob Knight 2025-04-22 04:19:20 -07:00 committed by GitHub
parent 26a6b2d143
commit bf6d8aee8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 554 additions and 255 deletions

View file

@ -55,15 +55,11 @@ use plonky2::{
hash::poseidon::PoseidonHash,
plonk::config::Hasher,
};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use crate::middleware::{
// serialization::{
// deserialize_hash_tuple, deserialize_value_tuple, serialize_hash_tuple,
// serialize_value_tuple,
// },
Params,
ToFields,
};
use super::serialization::*;
use crate::middleware::{Params, ToFields};
/// F is the native field we use everywhere. Currently it's Goldilocks from plonky2
pub type F = GoldilocksField;
@ -75,16 +71,15 @@ pub const EMPTY_VALUE: RawValue = RawValue([F::ZERO, F::ZERO, F::ZERO, F::ZERO])
pub const SELF_ID_HASH: Hash = Hash([F::ONE, F::ZERO, F::ZERO, F::ZERO]);
pub const EMPTY_HASH: Hash = Hash([F::ZERO, F::ZERO, F::ZERO, F::ZERO]);
#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq)]
// #[schemars(rename = "RawValue")]
#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
pub struct RawValue(
// #[serde(
// serialize_with = "serialize_value_tuple",
// deserialize_with = "deserialize_value_tuple"
// )]
#[serde(
serialize_with = "serialize_value_tuple",
deserialize_with = "deserialize_value_tuple"
)]
// We know that Serde will serialize and deserialize this as a string, so we can
// use the JsonSchema to validate the format.
// #[schemars(with = "String", regex(pattern = r"^[0-9a-fA-F]{64}$"))]
#[schemars(with = "String", regex(pattern = r"^[0-9a-fA-F]{64}$"))]
pub [F; VALUE_SIZE],
);
@ -152,13 +147,13 @@ impl fmt::Display for RawValue {
}
}
#[derive(Clone, Copy, Debug, Default, Hash, Eq, PartialEq)]
#[derive(Clone, Copy, Debug, Default, Hash, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct Hash(
// #[serde(
// serialize_with = "serialize_hash_tuple",
// deserialize_with = "deserialize_hash_tuple"
// )]
// #[schemars(with = "String", regex(pattern = r"^[0-9a-fA-F]{64}$"))]
#[serde(
serialize_with = "serialize_hash_tuple",
deserialize_with = "deserialize_hash_tuple"
)]
#[schemars(with = "String", regex(pattern = r"^[0-9a-fA-F]{64}$"))]
pub [F; HASH_SIZE],
);

View file

@ -3,7 +3,10 @@ use std::collections::{HashMap, HashSet};
/// This file implements the types defined at
/// https://0xparc.github.io/pod2/values.html#dictionary-array-set .
use anyhow::{anyhow, Result};
use schemars::JsonSchema;
use serde::{Deserialize, Deserializer, Serialize};
use super::serialization::{ordered_map, ordered_set};
#[cfg(feature = "backend_plonky2")]
use crate::backends::plonky2::primitives::merkletree::{MerkleProof, MerkleTree};
use crate::{
@ -14,9 +17,12 @@ use crate::{
/// Dictionary: the user original keys and values are hashed to be used in the leaf.
/// leaf.key=hash(original_key)
/// leaf.value=hash(original_value)
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize)]
#[serde(transparent)]
pub struct Dictionary {
#[serde(skip)]
mt: MerkleTree,
#[serde(serialize_with = "ordered_map")]
kvs: HashMap<Key, Value>,
}
@ -76,12 +82,36 @@ impl PartialEq for Dictionary {
}
impl Eq for Dictionary {}
impl<'de> Deserialize<'de> for Dictionary {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let kvs: HashMap<Key, Value> = HashMap::deserialize(deserializer)?;
Dictionary::new(kvs).map_err(serde::de::Error::custom)
}
}
impl JsonSchema for Dictionary {
fn schema_name() -> String {
"Dictionary".to_string()
}
fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
// Just use the schema of HashMap<Key, Value> since that's what we're actually serializing
<HashMap<Key, Value>>::json_schema(gen)
}
}
/// Set: the value field of the leaf is unused, and the key contains the hash of the element.
/// leaf.key=hash(original_value)
/// leaf.value=0
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize)]
#[serde(transparent)]
pub struct Set {
#[serde(skip)]
mt: MerkleTree,
#[serde(serialize_with = "ordered_set")]
set: HashSet<Value>,
}
@ -134,12 +164,38 @@ impl PartialEq for Set {
}
impl Eq for Set {}
impl<'de> Deserialize<'de> for Set {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
// Deserialize the set directly
let set: HashSet<Value> = HashSet::deserialize(deserializer)?;
// Create a new Set using the set field
Set::new(set).map_err(serde::de::Error::custom)
}
}
impl JsonSchema for Set {
fn schema_name() -> String {
"Set".to_string()
}
fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
// Just use the schema of HashSet<Value> since that's what we're actually serializing
<HashSet<Value>>::json_schema(gen)
}
}
/// Array: the elements are placed at the value field of each leaf, and the key field is just the
/// array index (integer).
/// leaf.key=i
/// leaf.value=original_value
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize)]
#[serde(transparent)]
pub struct Array {
#[serde(skip)]
mt: MerkleTree,
array: Vec<Value>,
}
@ -190,3 +246,24 @@ impl PartialEq for Array {
}
}
impl Eq for Array {}
impl<'de> Deserialize<'de> for Array {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let array: Vec<Value> = Vec::deserialize(deserializer)?;
Array::new(array).map_err(serde::de::Error::custom)
}
}
impl JsonSchema for Array {
fn schema_name() -> String {
"Array".to_string()
}
fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
// Just use the schema of Vec<Value> since that's what we're actually serializing
<Vec<Value>>::json_schema(gen)
}
}

View file

@ -2,16 +2,14 @@ use std::{fmt, iter, sync::Arc};
use anyhow::{anyhow, Result};
use plonky2::field::types::Field;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
// use schemars::JsonSchema;
// use serde::{Deserialize, Serialize};
use crate::{
middleware::HASH_SIZE,
middleware::{hash_fields, Hash, Key, NativePredicate, Params, ToFields, Value, F},
use crate::middleware::{
hash_fields, Hash, Key, NativePredicate, Params, ToFields, Value, F, HASH_SIZE,
};
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct Wildcard {
pub name: String,
pub index: usize,
@ -35,7 +33,8 @@ impl ToFields for Wildcard {
}
}
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "type", content = "value")]
pub enum KeyOrWildcard {
Key(Key),
Wildcard(Wildcard),
@ -62,7 +61,8 @@ impl ToFields for KeyOrWildcard {
}
}
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "type", content = "value")]
pub enum StatementTmplArg {
None,
Literal(Value),
@ -126,7 +126,7 @@ impl fmt::Display for StatementTmplArg {
}
/// Statement Template for a Custom Predicate
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct StatementTmpl {
pub pred: Predicate,
pub args: Vec<StatementTmplArg>,
@ -178,7 +178,8 @@ impl ToFields for StatementTmpl {
}
}
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "camelCase")]
/// NOTE: fields are not public (outside of crate) to enforce the struct instantiation through
/// the `::and/or` methods, which performs checks on the values.
pub struct CustomPredicate {
@ -278,7 +279,7 @@ impl fmt::Display for CustomPredicate {
}
}
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct CustomPredicateBatch {
pub name: String,
pub predicates: Vec<CustomPredicate>,
@ -315,7 +316,7 @@ impl CustomPredicateBatch {
}
}
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct CustomPredicateRef {
pub batch: Arc<CustomPredicateBatch>,
pub index: usize,
@ -330,8 +331,8 @@ impl CustomPredicateRef {
}
}
#[derive(Clone, Debug, PartialEq)]
// #[serde(tag = "type", content = "value")]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "type", content = "value")]
pub enum Predicate {
Native(NativePredicate),
BatchSelf(usize),

View file

@ -10,6 +10,8 @@ use std::{
use anyhow::anyhow;
use containers::{Array, Dictionary, Set};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
pub mod containers;
mod custom;
mod operation;
@ -22,8 +24,7 @@ pub use basetypes::*;
pub use custom::*;
use dyn_clone::DynClone;
pub use operation::*;
// use schemars::JsonSchema;
// use serde::{Deserialize, Serialize};
use serialization::*;
pub use statement::*;
use crate::backends::plonky2::primitives::merkletree::MerkleProof;
@ -31,7 +32,7 @@ use crate::backends::plonky2::primitives::merkletree::MerkleProof;
pub const SELF: PodId = PodId(SELF_ID_HASH);
// TODO: Move all value-related types to to `value.rs`
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
// TODO #[schemars(transform = serialization::transform_value_schema)]
pub enum TypedValue {
// Serde cares about the order of the enum variants, with untagged variants
@ -49,21 +50,18 @@ pub enum TypedValue {
Set(Set),
Dictionary(Dictionary),
Int(
// TODO #[serde(serialize_with = "serialize_i64", deserialize_with = "deserialize_i64")]
#[serde(serialize_with = "serialize_i64", deserialize_with = "deserialize_i64")]
// #[schemars(with = "String", regex(pattern = r"^\d+$"))]
i64,
),
// Uses the serialization for middleware::Value:
Raw(RawValue),
// UNTAGGED TYPES:
// #[serde(untagged)]
// #[schemars(skip)]
#[serde(untagged)]
Array(Array),
// #[serde(untagged)]
// #[schemars(skip)]
#[serde(untagged)]
String(String),
// #[serde(untagged)]
// #[schemars(skip)]
#[serde(untagged)]
Bool(bool),
}
@ -176,6 +174,106 @@ impl From<&TypedValue> for RawValue {
}
}
// Schemars/JsonSchema can't handle Serde's "untagged" variants.
// Instead, we have to implement schema generation directly. It's not as
// complicated as it looks, though.
// We have to generate schemas for each of the variants, and then combine them
// into a single schema using the `anyOf` keyword.
// If we add a new variant, we will have to update this function.
impl JsonSchema for TypedValue {
fn schema_name() -> String {
"TypedValue".to_string()
}
fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
use schemars::schema::{InstanceType, Schema, SchemaObject, SingleOrVec};
let set_schema = schemars::schema::SchemaObject {
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))),
object: Some(Box::new(schemars::schema::ObjectValidation {
properties: [("Set".to_string(), gen.subschema_for::<Set>())]
.into_iter()
.collect(),
required: ["Set".to_string()].into_iter().collect(),
..Default::default()
})),
..Default::default()
};
let dictionary_schema = schemars::schema::SchemaObject {
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))),
object: Some(Box::new(schemars::schema::ObjectValidation {
properties: [("Dictionary".to_string(), gen.subschema_for::<Dictionary>())]
.into_iter()
.collect(),
required: ["Dictionary".to_string()].into_iter().collect(),
..Default::default()
})),
..Default::default()
};
// Int is serialized/deserialized as a tagged string
let int_schema = schemars::schema::SchemaObject {
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))),
object: Some(Box::new(schemars::schema::ObjectValidation {
properties: [(
"Int".to_string(),
Schema::Object(SchemaObject {
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::String))),
metadata: Some(Box::new(schemars::schema::Metadata {
description: Some("An i64 represented as a string.".to_string()),
..Default::default()
})),
..Default::default()
}),
)]
.into_iter()
.collect(),
required: ["Int".to_string()].into_iter().collect(),
..Default::default()
})),
..Default::default()
};
let raw_schema = schemars::schema::SchemaObject {
instance_type: Some(SingleOrVec::Single(Box::new(InstanceType::Object))),
object: Some(Box::new(schemars::schema::ObjectValidation {
properties: [("Raw".to_string(), gen.subschema_for::<RawValue>())]
.into_iter()
.collect(),
required: ["Raw".to_string()].into_iter().collect(),
..Default::default()
})),
..Default::default()
};
// This is the part that Schemars can't generate automatically:
let untagged_array_schema = gen.subschema_for::<Array>();
let untagged_string_schema = gen.subschema_for::<String>();
let untagged_bool_schema = gen.subschema_for::<bool>();
Schema::Object(SchemaObject {
subschemas: Some(Box::new(schemars::schema::SubschemaValidation {
any_of: Some(vec![
Schema::Object(set_schema),
Schema::Object(dictionary_schema),
Schema::Object(int_schema),
Schema::Object(raw_schema),
untagged_array_schema,
untagged_string_schema,
untagged_bool_schema,
]),
..Default::default()
})),
metadata: Some(Box::new(schemars::schema::Metadata {
description: Some("Represents various POD value types. Array, String, and Bool variants are represented untagged in JSON.".to_string()),
..Default::default()
})),
..Default::default()
})
}
}
#[derive(Clone, Debug)]
pub struct Value {
// The `TypedValue` is under `Arc` so that cloning a `Value` is cheap.
@ -183,6 +281,37 @@ pub struct Value {
raw: RawValue,
}
// Values are serialized as their TypedValue.
impl Serialize for Value {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
self.typed.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for Value {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let typed = TypedValue::deserialize(deserializer)?;
Ok(Value::new(typed))
}
}
impl JsonSchema for Value {
fn schema_name() -> String {
"Value".to_string()
}
fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
// Just use the schema of TypedValue since that's what we're actually serializing
<TypedValue>::json_schema(gen)
}
}
impl PartialEq for Value {
fn eq(&self, other: &Self) -> bool {
self.raw == other.raw
@ -336,7 +465,44 @@ impl From<Key> for RawValue {
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
// When serializing a Key, we serialize only the name field, and not the hash.
// We can't directly tell Serde to render the whole struct as a string, so we
// implement our own serialization. It's important that if we change the
// structure of the Key struct, we update this implementation.
impl Serialize for Key {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
self.name.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for Key {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let name = String::deserialize(deserializer)?;
Ok(Key::new(name))
}
}
// As per the above, we implement custom serialization for the Key type, and
// Schemars can't automatically generate a schema for it. Instead, we tell it
// to use the standard String schema.
impl JsonSchema for Key {
fn schema_name() -> String {
"Key".to_string()
}
fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
<String>::json_schema(gen)
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct AnchoredKey {
pub pod_id: PodId,
pub key: Key,
@ -364,7 +530,7 @@ where
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Default, Serialize, Deserialize, JsonSchema)]
pub struct PodId(pub Hash);
impl ToFields for PodId {
@ -394,7 +560,8 @@ impl fmt::Display for PodType {
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Params {
pub max_input_signed_pods: usize,
pub max_input_main_pods: usize,

View file

@ -3,8 +3,8 @@ use std::{fmt, iter, sync::Arc};
use anyhow::{anyhow, Result};
use log::error;
use plonky2::field::types::Field;
use serde::{Deserialize, Serialize};
// use serde::{Deserialize, Serialize};
use crate::{
backends::plonky2::primitives::merkletree::MerkleProof,
middleware::{
@ -14,7 +14,7 @@ use crate::{
},
};
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum OperationType {
Native(NativeOperation),
Custom(CustomPredicateRef),
@ -54,7 +54,7 @@ impl ToFields for OperationType {
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub enum NativeOperation {
None = 0,
NewEntry = 1,

View file

@ -1,8 +1,9 @@
// TODO: Reenable
/*
use plonky2::field::types::Field;
use serde::Deserialize;
use std::collections::{HashMap, HashSet};
use plonky2::field::types::Field;
use serde::{ser::SerializeSeq, Deserialize, Serialize, Serializer};
use super::{Key, Value};
use crate::middleware::{F, HASH_SIZE, VALUE_SIZE};
fn serialize_field_tuple<S, const N: usize>(
@ -69,4 +70,60 @@ where
{
deserialize_field_tuple::<D, VALUE_SIZE>(deserializer)
}
*/
pub fn serialize_i64<S>(value: &i64, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&value.to_string())
}
pub fn deserialize_i64<'de, D>(deserializer: D) -> Result<i64, D::Error>
where
D: serde::Deserializer<'de>,
{
String::deserialize(deserializer)?
.parse()
.map_err(serde::de::Error::custom)
}
// In order to serialize a Dictionary consistently, we want to order the
// key-value pairs by the key's name field. This has no effect on the hashes
// of the keys and therefore on the Merkle tree, but it makes the serialized
// output deterministic.
pub fn ordered_map<S, V: Serialize>(
value: &HashMap<Key, V>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
// Convert to Vec and sort by the key's name field
let mut pairs: Vec<_> = value.iter().collect();
pairs.sort_by(|(k1, _), (k2, _)| k1.name.cmp(&k2.name));
// Serialize as a map
use serde::ser::SerializeMap;
let mut map = serializer.serialize_map(Some(pairs.len()))?;
for (k, v) in pairs {
map.serialize_entry(k, v)?;
}
map.end()
}
// Sets are serialized as sequences of elements, which are not ordered by
// default. We want to serialize them in a deterministic way, and we can
// achieve this by sorting the elements. This takes advantage of the fact that
// Value implements Ord.
pub fn ordered_set<S>(value: &HashSet<Value>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut set = serializer.serialize_seq(Some(value.len()))?;
let mut sorted_values: Vec<&Value> = value.iter().collect();
sorted_values.sort();
for v in sorted_values {
set.serialize_element(v)?;
}
set.end()
}

View file

@ -2,8 +2,8 @@ use std::{fmt, iter};
use anyhow::{anyhow, Result};
use plonky2::field::types::Field;
// use schemars::JsonSchema;
// use serde::{Deserialize, Serialize};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use strum_macros::FromRepr;
use crate::middleware::{
@ -20,7 +20,7 @@ pub const STATEMENT_ARG_F_LEN: usize = 8;
pub const OPERATION_ARG_F_LEN: usize = 1;
pub const OPERATION_AUX_F_LEN: usize = 1;
#[derive(Clone, Copy, Debug, FromRepr, PartialEq, Eq, Hash)]
#[derive(Clone, Copy, Debug, FromRepr, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
pub enum NativePredicate {
None = 0,
ValueOf = 1,
@ -49,7 +49,7 @@ impl ToFields for NativePredicate {
}
}
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
pub enum WildcardValue {
PodId(PodId),
Key(Key),
@ -83,7 +83,8 @@ impl ToFields for WildcardValue {
}
/// Type encapsulating statements with their associated arguments.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "predicate", content = "args")]
pub enum Statement {
None,
ValueOf(AnchoredKey, Value),
@ -275,7 +276,7 @@ impl fmt::Display for Statement {
}
/// Statement argument type. Useful for statement decompositions.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum StatementArg {
None,
Literal(Value),