pod2/src/lang/grammar.pest
Rob Knight acab26e5c1
Remove batch splitting system (#475)
* First pass at removing batch splitting

* Refactor to separate module loading from request parsing

* Consolidate module functionality

* Tidy up comments

* Use array of modules instead of HashMap

* Formatting

* Use module hashes when importing modules
2026-02-09 10:31:47 +01:00

123 lines
4.5 KiB
Text

// Grammar for the "Podlang" language. Used for describing POD2 Custom
// Predicates and Proof Requests.
// Note: Silent rules (`_`) generate no tokens. WHITESPACE and COMMENT are
// special names known to Pest. If defined they are implicitly allowed between
// any others rules.
// WHITESPACE matches one or more spaces, tabs, or newlines.
WHITESPACE = _{ (" " | "\t" | NEWLINE)+ }
// COMMENT matches a line comment (//...\n) or block comment (/*...*/).
COMMENT = _{ ("//" ~ (!NEWLINE ~ ANY)* | "/*" ~ (!"*/" ~ ANY)* ~ "*/" ) }
reserved_identifier = { "private" | "true" | "false" }
// Define rules for identifiers (predicate names, wildcard names)
// Must start with alpha or _, followed by alpha, numeric, or _
identifier = @{ !reserved_identifier ~ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
private_kw = { "private:" }
arg_section = {
public_arg_list ~ ("," ~ private_kw ~ private_arg_list)?
}
public_arg_list = { identifier ~ ("," ~ identifier)* }
private_arg_list = { identifier ~ ("," ~ identifier)* }
document = { SOI ~ (use_module_statement | use_intro_statement | custom_predicate_def | request_def)* ~ EOI }
use_module_statement = { "use" ~ "module" ~ hash_hex ~ "as" ~ identifier }
use_intro_statement = { "use" ~ "intro" ~ identifier ~ "(" ~ use_intro_arg_list? ~ ")" ~ "from" ~ intro_predicate_ref }
use_intro_arg_list = { identifier ~ ("," ~ identifier)* }
intro_predicate_ref = { hash_hex }
request_def = { "REQUEST" ~ "(" ~ statement_list? ~ ")" }
// Define conjunction type explicitly
conjunction_type = { "AND" | "OR" }
custom_predicate_def = {
identifier
~ "(" ~ arg_section ~ ")"
~ "="
~ conjunction_type
~ "(" ~ statement_list ~ ")"
}
statement_list = { statement+ }
statement_arg = { literal_value | anchored_key | identifier }
statement_arg_list = { statement_arg ~ ("," ~ statement_arg)* }
// Predicate reference: either qualified (module::predicate) or local (predicate)
predicate_ref = { qualified_predicate_ref | identifier }
qualified_predicate_ref = { identifier ~ "::" ~ identifier }
statement = { predicate_ref ~ "(" ~ statement_arg_list? ~ ")" }
// Anchored Key: Var["key_literal"] or Var.key_identifier
anchored_key = {
(identifier ~ "[" ~ literal_string ~ "]")
| (identifier ~ "." ~ identifier)
}
// Literal Values (ordered to avoid ambiguity, e.g., string before int)
literal_value = {
literal_public_key |
literal_secret_key |
literal_dict |
literal_set |
literal_array |
literal_bool |
literal_raw |
literal_string |
literal_int
}
// Primitive literal types
literal_int = @{ "-"? ~ ASCII_DIGIT+ }
literal_bool = @{ "true" | "false" }
// hash_hex: 0x followed by exactly 32 PAIRS of hex digits (64 hex characters)
// representing a 32-byte value in big-endian order
hash_hex = @{ "0x" ~ (ASCII_HEX_DIGIT ~ ASCII_HEX_DIGIT){32} }
literal_raw = { "Raw" ~ "(" ~ hash_hex ~ ")" }
// String literal parsing based on https://pest.rs/book/examples/json.html
literal_string = ${ "\"" ~ inner ~ "\"" } // Compound atomic string rule
inner = @{ char* } // Atomic rule for the raw inner content
char = { // Rule for a single logical character (unescaped or escaped)
!("\"" | "\\") ~ ANY // Any char except quote or backslash
| "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t") // Simple escape sequences
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) // Unicode escape sequence
}
// PublicKey(...)
base58_char = { '1'..'9' | 'A'..'H' | 'J'..'N' | 'P'..'Z' | 'a'..'k' | 'm'..'z' }
base58_string = @{ base58_char+ }
literal_public_key = { "PublicKey" ~ "(" ~ base58_string ~ ")" }
// SecretKey(...)
base64_char = { 'a'..'z' | 'A'..'Z' | '0'..'9' | "+" | "/" | "=" }
base64_string = @{ base64_char+ }
literal_secret_key = { "SecretKey" ~ "(" ~ base64_string ~ ")" }
// Container Literals (recursive definition using literal_value)
literal_array = { "[" ~ (literal_value ~ ("," ~ literal_value)*)? ~ "]" }
literal_set = { "#[" ~ (literal_value ~ ("," ~ literal_value)*)? ~ "]" }
literal_dict = { "{" ~ (dict_pair ~ ("," ~ dict_pair)*)? ~ "}" }
dict_pair = { literal_string ~ ":" ~ literal_value }
// --- Rules for testing full input matching ---
test_identifier = { SOI ~ identifier ~ EOI }
test_literal_int = { SOI ~ literal_int ~ EOI }
test_hash_hex = { SOI ~ hash_hex ~ EOI }
test_literal_raw = { SOI ~ literal_raw ~ EOI }
test_literal_value = { SOI ~ literal_value ~ EOI }
test_statement = { SOI ~ statement ~ EOI }
test_statement_arg = { SOI ~ statement_arg ~ EOI }
test_custom_predicate_def = { SOI ~ custom_predicate_def ~ EOI }