Fix nondeterminism in splitting (#482)
This commit is contained in:
parent
cdf227e353
commit
e950661090
1 changed files with 69 additions and 12 deletions
|
|
@ -15,7 +15,10 @@
|
||||||
//! We use a greedy algorithm to order the statements in a predicate to minimize
|
//! We use a greedy algorithm to order the statements in a predicate to minimize
|
||||||
//! the number of live wildcards at split boundaries.
|
//! the number of live wildcards at split boundaries.
|
||||||
|
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::{
|
||||||
|
cmp::Reverse,
|
||||||
|
collections::{HashMap, HashSet},
|
||||||
|
};
|
||||||
|
|
||||||
// SplittingError is now defined in error.rs
|
// SplittingError is now defined in error.rs
|
||||||
pub use crate::lang::error::SplittingError;
|
pub use crate::lang::error::SplittingError;
|
||||||
|
|
@ -253,6 +256,28 @@ fn compute_tie_breakers(
|
||||||
(simplicity, closes_count, -(fanout as i32))
|
(simplicity, closes_count, -(fanout as i32))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn statement_selection_key(
|
||||||
|
idx: usize,
|
||||||
|
statements: &[StatementTmpl],
|
||||||
|
active_wildcards: &HashSet<String>,
|
||||||
|
remaining: &HashSet<usize>,
|
||||||
|
approaching_split: bool,
|
||||||
|
) -> (i32, (usize, usize, i32), Reverse<usize>) {
|
||||||
|
let primary_score = score_statement(
|
||||||
|
&statements[idx],
|
||||||
|
active_wildcards,
|
||||||
|
statements,
|
||||||
|
remaining,
|
||||||
|
approaching_split,
|
||||||
|
);
|
||||||
|
let tie_breakers =
|
||||||
|
compute_tie_breakers(&statements[idx], active_wildcards, statements, remaining);
|
||||||
|
|
||||||
|
// Final deterministic tie-breaker: prefer smaller original indices.
|
||||||
|
// This avoids hash-iteration-dependent selection when scores are equal.
|
||||||
|
(primary_score, tie_breakers, Reverse(idx))
|
||||||
|
}
|
||||||
|
|
||||||
/// Find the best next statement to add based on scoring heuristic
|
/// Find the best next statement to add based on scoring heuristic
|
||||||
fn find_best_next_statement(
|
fn find_best_next_statement(
|
||||||
statements: &[StatementTmpl],
|
statements: &[StatementTmpl],
|
||||||
|
|
@ -268,16 +293,13 @@ fn find_best_next_statement(
|
||||||
remaining
|
remaining
|
||||||
.iter()
|
.iter()
|
||||||
.max_by_key(|&&idx| {
|
.max_by_key(|&&idx| {
|
||||||
let primary_score = score_statement(
|
statement_selection_key(
|
||||||
&statements[idx],
|
idx,
|
||||||
active_wildcards,
|
|
||||||
statements,
|
statements,
|
||||||
|
active_wildcards,
|
||||||
remaining,
|
remaining,
|
||||||
approaching_split,
|
approaching_split,
|
||||||
);
|
)
|
||||||
let tie_breakers =
|
|
||||||
compute_tie_breakers(&statements[idx], active_wildcards, statements, remaining);
|
|
||||||
(primary_score, tie_breakers)
|
|
||||||
})
|
})
|
||||||
.copied()
|
.copied()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
|
@ -362,10 +384,14 @@ fn generate_refactor_suggestion(
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Normalize wildcard order so diagnostics are deterministic.
|
||||||
|
let mut sorted_crossing_wildcards = crossing_wildcards.to_vec();
|
||||||
|
sorted_crossing_wildcards.sort();
|
||||||
|
|
||||||
// Analyze the span of each crossing wildcard
|
// Analyze the span of each crossing wildcard
|
||||||
let mut wildcard_spans: Vec<(String, usize, usize, usize)> = Vec::new();
|
let mut wildcard_spans: Vec<(String, usize, usize, usize)> = Vec::new();
|
||||||
|
|
||||||
for wildcard in crossing_wildcards {
|
for wildcard in &sorted_crossing_wildcards {
|
||||||
let mut first_use = None;
|
let mut first_use = None;
|
||||||
let mut last_use = None;
|
let mut last_use = None;
|
||||||
|
|
||||||
|
|
@ -401,9 +427,9 @@ fn generate_refactor_suggestion(
|
||||||
}
|
}
|
||||||
|
|
||||||
// If multiple wildcards cross the boundary, suggest grouping
|
// If multiple wildcards cross the boundary, suggest grouping
|
||||||
if crossing_wildcards.len() > 1 {
|
if sorted_crossing_wildcards.len() > 1 {
|
||||||
return Some(RefactorSuggestion::GroupWildcardUsages {
|
return Some(RefactorSuggestion::GroupWildcardUsages {
|
||||||
wildcards: crossing_wildcards.to_vec(),
|
wildcards: sorted_crossing_wildcards,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -459,11 +485,12 @@ fn split_into_chain(
|
||||||
// Check: Can we fit promoted wildcards in public args?
|
// Check: Can we fit promoted wildcards in public args?
|
||||||
// Need to account for possible overlap between incoming_public and live_at_boundary
|
// Need to account for possible overlap between incoming_public and live_at_boundary
|
||||||
let incoming_set: HashSet<_> = incoming_public.iter().cloned().collect();
|
let incoming_set: HashSet<_> = incoming_public.iter().cloned().collect();
|
||||||
let new_promotions: Vec<_> = live_at_boundary
|
let mut new_promotions: Vec<_> = live_at_boundary
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|w| !incoming_set.contains(*w))
|
.filter(|w| !incoming_set.contains(*w))
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect();
|
.collect();
|
||||||
|
new_promotions.sort();
|
||||||
let total_public = incoming_public.len() + new_promotions.len();
|
let total_public = incoming_public.len() + new_promotions.len();
|
||||||
if total_public > Params::max_statement_args() {
|
if total_public > Params::max_statement_args() {
|
||||||
let context = crate::lang::error::SplitContext {
|
let context = crate::lang::error::SplitContext {
|
||||||
|
|
@ -933,6 +960,36 @@ mod tests {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_statement_selection_prefers_lower_index_on_tie() {
|
||||||
|
// Two structurally symmetric statements produce identical heuristic scores.
|
||||||
|
// Determinism comes from the final index-based tie breaker.
|
||||||
|
let input = r#"
|
||||||
|
tie_break(A, B) = AND (
|
||||||
|
Equal(A["x"], B["x"])
|
||||||
|
Equal(A["y"], B["y"])
|
||||||
|
)
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let pred = parse_predicate(input);
|
||||||
|
let statements = pred.statements;
|
||||||
|
let remaining: HashSet<usize> = [0, 1].into_iter().collect();
|
||||||
|
let active_wildcards = HashSet::new();
|
||||||
|
|
||||||
|
let key0 = statement_selection_key(0, &statements, &active_wildcards, &remaining, false);
|
||||||
|
let key1 = statement_selection_key(1, &statements, &active_wildcards, &remaining, false);
|
||||||
|
|
||||||
|
assert_eq!(key0.0, key1.0, "Primary heuristic score should tie");
|
||||||
|
assert_eq!(key0.1, key1.1, "Secondary tie-breaker metrics should tie");
|
||||||
|
assert!(
|
||||||
|
key0 > key1,
|
||||||
|
"Lower original index should win deterministic final tie-breaker"
|
||||||
|
);
|
||||||
|
|
||||||
|
let selected = find_best_next_statement(&statements, &remaining, &active_wildcards, 0);
|
||||||
|
assert_eq!(selected, 0);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_greedy_ordering_reduces_liveness() {
|
fn test_greedy_ordering_reduces_liveness() {
|
||||||
// This test verifies that our greedy ordering algorithm reduces wildcard liveness
|
// This test verifies that our greedy ordering algorithm reduces wildcard liveness
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue