Support "records" in Podlang (#507)

* Support both integer and string keys in anchored keys

* Podlang parser support for records

* Validate record usage in Podlang

* Lower records to middleware

* Cross-module record imports

* Tidying

* Record entry name literal

* More tidying

* More tests, make sure qualified record literals are supported

* Use snake-case for record entry names

* Review feedback
This commit is contained in:
Rob Knight 2026-05-06 06:21:22 -07:00 committed by GitHub
parent 5e3ac9a101
commit e9e3241263
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 2020 additions and 198 deletions

View file

@ -13,7 +13,7 @@ use hex::ToHex;
use crate::{
lang::{frontend_ast::*, Module},
middleware::{CustomPredicateBatch, Hash, NativePredicate},
middleware::{CustomPredicateBatch, Hash, NativePredicate, Params},
};
/// A validated AST document with symbol table and diagnostics
@ -51,6 +51,55 @@ pub struct SymbolTable {
pub wildcard_scopes: HashMap<String, WildcardScope>,
/// Imported modules (bound name → Module reference)
pub imported_modules: HashMap<String, Arc<Module>>,
/// Records visible in this scope (local declarations + imports).
pub records: HashMap<String, RecordSchema>,
}
/// Resolved record schema: ordered entries plus a name→index lookup, with
/// provenance for diagnostics. Lowering uses `entry_index` to translate
/// dot-access like `r.foo` into the integer key for an `AnchoredKey`.
#[derive(Debug, Clone)]
pub struct RecordSchema {
pub entries: Vec<String>,
pub entry_index: HashMap<String, usize>,
pub source: RecordSource,
pub source_span: Option<Span>,
}
impl RecordSchema {
/// Build a schema from already-deduplicated entries. Callers that need
/// to surface a per-entry span on duplicates (e.g. local declarations)
/// should detect duplicates themselves before calling this.
pub fn from_entries(
entries: Vec<String>,
source: RecordSource,
source_span: Option<Span>,
) -> Self {
let entry_index = entries
.iter()
.enumerate()
.map(|(i, e)| (e.clone(), i))
.collect();
Self {
entries,
entry_index,
source,
source_span,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RecordSource {
Local,
Imported { module: String },
}
/// Build the `SymbolTable.records` key for a record imported via
/// `use module ... as alias`. Mirrors the `alias::Name` form used for
/// `TypeRef::Qualified`.
pub fn qualified_record_key(alias: &str, name: &str) -> String {
format!("{}::{}", alias, name)
}
/// Information about a predicate
@ -96,6 +145,9 @@ pub struct WildcardInfo {
pub index: usize,
pub is_public: bool,
pub source_span: Option<Span>,
/// Record type tag for typed args (`name TypeName` syntax). The name
/// references an entry in `SymbolTable.records`.
pub record_type: Option<String>,
}
/// Diagnostic message (warning or info)
@ -127,14 +179,16 @@ pub enum ParseMode {
pub fn validate(
document: Document,
available_modules: &HashMap<Hash, Arc<Module>>,
params: &Params,
mode: ParseMode,
) -> Result<ValidatedAST, ValidationError> {
let validator = Validator::new(available_modules, mode);
let validator = Validator::new(available_modules, params, mode);
validator.validate(document)
}
struct Validator {
available_modules: HashMap<Hash, Arc<Module>>,
params: Params,
symbols: SymbolTable,
diagnostics: Vec<Diagnostic>,
custom_predicate_count: usize,
@ -142,13 +196,19 @@ struct Validator {
}
impl Validator {
fn new(available_modules: &HashMap<Hash, Arc<Module>>, mode: ParseMode) -> Self {
fn new(
available_modules: &HashMap<Hash, Arc<Module>>,
params: &Params,
mode: ParseMode,
) -> Self {
Self {
available_modules: available_modules.clone(),
params: params.clone(),
symbols: SymbolTable {
predicates: HashMap::new(),
wildcard_scopes: HashMap::new(),
imported_modules: HashMap::new(),
records: HashMap::new(),
},
diagnostics: Vec::new(),
custom_predicate_count: 0,
@ -181,6 +241,13 @@ impl Validator {
}
}
// Records before predicates so typed-arg resolution can find them.
for item in &document.items {
if let DocumentItem::RecordDef(record_def) = item {
self.process_record_def(record_def)?;
}
}
// Check mode constraints for predicate definitions
let mut has_predicates = false;
for item in &document.items {
@ -214,7 +281,7 @@ impl Validator {
}
}
// Enforce that modules have predicates and requests have a REQUEST block
// Enforce that modules have predicates and requests have a REQUEST block.
match self.mode {
ParseMode::Module if !has_predicates => {
return Err(ValidationError::NoPredicatesInModule);
@ -244,6 +311,22 @@ impl Validator {
span: use_stmt.span,
})?;
// Flatten the imported module's locally-declared records into the
// symbol table under qualified keys (`alias::Name`). No transitive
// re-export — `Module.records` only carries local declarations.
for (record_name, entries) in &module.records {
self.symbols.records.insert(
qualified_record_key(alias, record_name),
RecordSchema::from_entries(
entries.clone(),
RecordSource::Imported {
module: alias.clone(),
},
use_stmt.span,
),
);
}
// Store the module keyed by alias for later qualified name resolution
self.symbols
.imported_modules
@ -252,6 +335,24 @@ impl Validator {
Ok(())
}
/// Returns the resolved `SymbolTable.records` key for a typed arg, or
/// `None` if the arg has no `type_name`. The key is the bare type name
/// for locals and `"alias::Name"` for qualified imports. Errors if the
/// tag doesn't refer to a known record.
fn resolve_typed_arg(&self, arg: &TypedArg) -> Result<Option<String>, ValidationError> {
let Some(type_ref) = &arg.type_name else {
return Ok(None);
};
let key = type_ref.symbol_table_key();
if !self.symbols.records.contains_key(&key) {
return Err(ValidationError::UnknownRecord {
name: key,
span: type_ref.span(),
});
}
Ok(Some(key))
}
fn process_use_intro_statement(
&mut self,
use_stmt: &UseIntroStatement,
@ -283,6 +384,48 @@ impl Validator {
Ok(())
}
fn process_record_def(&mut self, record_def: &RecordDef) -> Result<(), ValidationError> {
let name = &record_def.name.name;
if let Some(existing) = self.symbols.records.get(name) {
return Err(ValidationError::DuplicateRecord {
name: name.clone(),
first_span: existing.source_span,
second_span: record_def.name.span,
});
}
let max = self.params.max_record_entries();
if record_def.entries.len() > max {
return Err(ValidationError::RecordTooManyEntries {
name: name.clone(),
count: record_def.entries.len(),
max,
span: record_def.span,
});
}
let mut seen = HashSet::with_capacity(record_def.entries.len());
let mut entries = Vec::with_capacity(record_def.entries.len());
for entry in &record_def.entries {
if !seen.insert(&entry.name) {
return Err(ValidationError::DuplicateRecordEntry {
record: name.clone(),
entry: entry.name.clone(),
span: entry.span,
});
}
entries.push(entry.name.clone());
}
self.symbols.records.insert(
name.clone(),
RecordSchema::from_entries(entries, RecordSource::Local, record_def.name.span),
);
Ok(())
}
fn process_custom_predicate_def(
&mut self,
pred_def: &CustomPredicateDef,
@ -318,12 +461,14 @@ impl Validator {
span: arg.span,
});
}
let record_type = self.resolve_typed_arg(arg)?;
wildcards.insert(
arg.name.clone(),
WildcardInfo {
index: wildcard_index,
is_public: true,
source_span: arg.span,
record_type,
},
);
wildcard_index += 1;
@ -339,12 +484,14 @@ impl Validator {
span: arg.span,
});
}
let record_type = self.resolve_typed_arg(arg)?;
wildcards.insert(
arg.name.clone(),
WildcardInfo {
index: wildcard_index,
is_public: false,
source_span: arg.span,
record_type,
},
);
wildcard_index += 1;
@ -443,10 +590,7 @@ impl Validator {
wildcard_context: Option<(&str, &WildcardScope)>,
) -> Result<(), ValidationError> {
let pred_name = stmt.predicate.predicate_name();
let pred_span = match &stmt.predicate {
PredicateRef::Local(id) => id.span,
PredicateRef::Qualified { predicate, .. } => predicate.span,
};
let pred_span = stmt.predicate.span();
let wc_names = match wildcard_context {
Some((_, wc_scope)) => wc_scope.wildcards.keys().collect(),
@ -547,12 +691,44 @@ impl Validator {
}
StatementTmplArg::AnchoredKey(ak) => {
if let Some((pred_name, scope)) = wildcard_context {
if !scope.wildcards.contains_key(&ak.root.name) {
let Some(wc_info) = scope.wildcards.get(&ak.root.name) else {
return Err(ValidationError::UndefinedWildcard {
name: ak.root.name.clone(),
pred_name: pred_name.to_string(),
span: ak.root.span,
});
};
// Records are integer-keyed, so string-key access on
// a typed wildcard is dead code at proof time. Reject
// dot access for unknown entries and bracket access
// outright; require `r.entry` for record-shaped data.
if let Some(record_name) = &wc_info.record_type {
match &ak.key {
AnchoredKeyPath::Dot(entry) => {
let schema =
self.symbols.records.get(record_name).expect(
"record_type was resolved at predicate-def time",
);
if !schema.entry_index.contains_key(&entry.name) {
return Err(ValidationError::UnknownRecordEntry {
record: record_name.clone(),
entry: entry.name.clone(),
span: entry.span,
});
}
}
AnchoredKeyPath::Bracket(_) => {
return Err(ValidationError::BracketAccessOnTypedWildcard {
wildcard: ak.root.name.clone(),
record: record_name.clone(),
span: ak.span,
});
}
AnchoredKeyPath::Index(_) => unreachable!(
"AnchoredKeyPath::Index is introduced during lowering; \
it cannot appear in the parsed AST that validation sees"
),
}
}
}
}
@ -638,6 +814,51 @@ impl Validator {
}
Ok(())
}
LiteralValue::Record(r) => {
let key = r.name.symbol_table_key();
let Some(schema) = self.symbols.records.get(&key) else {
return Err(ValidationError::UnknownRecord {
name: key,
span: r.name.span(),
});
};
let mut seen: HashSet<&String> = HashSet::new();
for entry in &r.entries {
if !schema.entry_index.contains_key(&entry.name.name) {
return Err(ValidationError::UnknownRecordEntry {
record: key.clone(),
entry: entry.name.name.clone(),
span: entry.name.span,
});
}
if !seen.insert(&entry.name.name) {
return Err(ValidationError::DuplicateLiteralRecordEntry {
record: key.clone(),
entry: entry.name.name.clone(),
span: entry.name.span,
});
}
self.validate_literal_value(&entry.value)?;
}
Ok(())
}
LiteralValue::RecordEntryIndex { record, entry } => {
let key = record.symbol_table_key();
let Some(schema) = self.symbols.records.get(&key) else {
return Err(ValidationError::UnknownRecord {
name: key,
span: record.span(),
});
};
if !schema.entry_index.contains_key(&entry.name) {
return Err(ValidationError::UnknownRecordEntry {
record: key,
entry: entry.name.clone(),
span: entry.span,
});
}
Ok(())
}
_ => Ok(()),
}
}
@ -659,7 +880,7 @@ mod tests {
) -> Result<ValidatedAST, ValidationError> {
let parsed = parse_podlang(input).expect("Failed to parse");
let document = parse_document(parsed.into_iter().next().unwrap()).expect("Failed to parse");
validate(document, modules, ParseMode::Module)
validate(document, modules, &Params::default(), ParseMode::Module)
}
fn parse_and_validate_request(
@ -668,7 +889,7 @@ mod tests {
) -> Result<ValidatedAST, ValidationError> {
let parsed = parse_podlang(input).expect("Failed to parse");
let document = parse_document(parsed.into_iter().next().unwrap()).expect("Failed to parse");
validate(document, modules, ParseMode::Request)
validate(document, modules, &Params::default(), ParseMode::Request)
}
#[test]
@ -846,8 +1067,9 @@ mod tests {
span: None,
},
args: ArgSection {
public_args: vec![Identifier {
public_args: vec![TypedArg {
name: "A".to_string(),
type_name: None,
span: None,
}],
private_args: None,
@ -858,7 +1080,12 @@ mod tests {
span: None,
})],
};
let result = validate(document, &HashMap::new(), ParseMode::Module);
let result = validate(
document,
&HashMap::new(),
&Params::default(),
ParseMode::Module,
);
assert!(matches!(
result,
Err(ValidationError::EmptyStatementList { .. })
@ -936,4 +1163,247 @@ mod tests {
let result = parse_and_validate_request(input, &HashMap::new());
assert!(result.is_ok());
}
// ----- Records ----------------------------------------------------------
#[test]
fn test_record_decl_accepted() {
let input = r#"
record ProcInputs = (foo, bar, baz)
my_pred(A) = AND(Equal(A["x"], 1))
"#;
let validated = parse_and_validate_module(input, &HashMap::new()).unwrap();
let schema = validated.symbols.records.get("ProcInputs").unwrap();
assert_eq!(schema.entries, vec!["foo", "bar", "baz"]);
assert_eq!(schema.source, RecordSource::Local);
}
#[test]
fn test_records_only_module_rejected() {
// A module needs at least one predicate; record-only modules are not
// a valid distribution unit.
let input = r#"record R = (x)"#;
assert!(matches!(
parse_and_validate_module(input, &HashMap::new()),
Err(ValidationError::NoPredicatesInModule)
));
}
#[test]
fn test_duplicate_record() {
let input = r#"
record R = (foo)
record R = (bar)
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::DuplicateRecord { .. })
));
}
#[test]
fn test_duplicate_entry_in_record() {
let input = r#"
record R = (foo, foo)
my_pred(A) = AND(Equal(A["x"], 1))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::DuplicateRecordEntry { record, entry, .. })
if record == "R" && entry == "foo"
));
}
#[test]
fn test_record_entry_cap() {
// Use a non-default depth so the cap reflects the parameter (not
// some hard-coded default). This pins three facts in one test:
// the param is wired through, the boundary is inclusive on accept,
// and cap + 1 is rejected.
let mut params = Params::default();
params.containers.max_depth_small -= 1;
let cap = params.max_record_entries();
let validate_with_n_entries = |n: usize| {
let entries: Vec<String> = (0..n).map(|i| format!("f{i}")).collect();
let input = format!(
"record Big = ({})\nmy_pred(A) = AND(Equal(A[\"x\"], 1))",
entries.join(", ")
);
let parsed = parse_podlang(&input).expect("Failed to parse");
let document =
parse_document(parsed.into_iter().next().unwrap()).expect("Failed to parse");
validate(document, &HashMap::new(), &params, ParseMode::Module)
};
assert!(validate_with_n_entries(cap).is_ok());
let too_many = cap + 1;
assert!(matches!(
validate_with_n_entries(too_many),
Err(ValidationError::RecordTooManyEntries { count, max, .. })
if count == too_many && max == cap
));
}
#[test]
fn test_typed_arg_resolves_known_record() {
let input = r#"
record R = (foo, bar)
my_pred(in R) = AND(Equal(in.foo, in.bar))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(result.is_ok());
let validated = result.unwrap();
let scope = validated.symbols.wildcard_scopes.get("my_pred").unwrap();
assert_eq!(scope.wildcards["in"].record_type.as_deref(), Some("R"));
}
#[test]
fn test_typed_arg_unknown_record_rejected() {
let input = r#"
my_pred(in NonExistent) = AND(Equal(in.foo, 1))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::UnknownRecord { name, .. }) if name == "NonExistent"
));
}
#[test]
fn test_dot_access_unknown_entry_rejected() {
let input = r#"
record R = (foo, bar)
my_pred(in R) = AND(Equal(in.quux, 1))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::UnknownRecordEntry { record, entry, .. })
if record == "R" && entry == "quux"
));
}
#[test]
fn test_dot_access_on_untyped_wildcard_unchecked() {
// r.foo on an untyped wildcard keeps current POD-string-key behavior;
// no record exists named anything that would constrain `foo`.
let input = r#"
my_pred(r) = AND(Equal(r.foo, 1))
"#;
assert!(parse_and_validate_module(input, &HashMap::new()).is_ok());
}
#[test]
fn test_bracket_access_on_typed_wildcard_rejected() {
// Records are integer-keyed; string-key access on a record-typed
// wildcard is incoherent and would never resolve at proof time.
// Force the user to use `.entry` instead.
let input = r#"
record R = (foo)
my_pred(r R) = AND(Equal(r["foo"], 1))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::BracketAccessOnTypedWildcard { wildcard, record, .. })
if wildcard == "r" && record == "R"
));
}
#[test]
fn test_record_literal_unknown_record() {
let input = r#"
my_pred(A) = AND(Equal(A["x"], NotARecord(f: 1)))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::UnknownRecord { name, .. }) if name == "NotARecord"
));
}
#[test]
fn test_record_literal_unknown_entry() {
let input = r#"
record R = (foo, bar)
my_pred(A) = AND(Equal(A["x"], R(foo: 1, quux: 2)))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::UnknownRecordEntry { record, entry, .. })
if record == "R" && entry == "quux"
));
}
#[test]
fn test_record_literal_nested() {
// Nested literals recurse through `validate_literal_value`: an unknown
// entry on the inner literal must still be caught.
let input = r#"
record Outer = (inner)
record Inner = (x, y)
my_pred(A) = AND(Equal(A["x"], Outer(inner: Inner(x: 1, z: 2))))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::UnknownRecordEntry { record, entry, .. })
if record == "Inner" && entry == "z"
));
}
#[test]
fn test_record_literal_duplicate_entry() {
let input = r#"
record R = (foo, bar)
my_pred(A) = AND(Equal(A["x"], R(foo: 1, foo: 2)))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::DuplicateLiteralRecordEntry { record, entry, .. })
if record == "R" && entry == "foo"
));
}
#[test]
fn test_record_entry_index_resolves() {
// Validation accepts `R::bar` and the schema records bar at index 1
// — the integer the literal will lower to.
let input = r#"
record R = (foo, bar)
my_pred(A) = AND(Contains(A, R::bar, 7))
"#;
let validated = parse_and_validate_module(input, &HashMap::new()).unwrap();
let schema = validated.symbols.records.get("R").unwrap();
assert_eq!(schema.entry_index["bar"], 1);
}
#[test]
fn test_record_entry_index_unknown_record() {
let input = r#"
my_pred(A) = AND(Contains(A, NotARecord::foo, 7))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::UnknownRecord { name, .. }) if name == "NotARecord"
));
}
#[test]
fn test_record_entry_index_unknown_entry() {
let input = r#"
record R = (foo, bar)
my_pred(A) = AND(Contains(A, R::quux, 7))
"#;
let result = parse_and_validate_module(input, &HashMap::new());
assert!(matches!(
result,
Err(ValidationError::UnknownRecordEntry { record, entry, .. })
if record == "R" && entry == "quux"
));
}
}