feat: different modules per task

2026-04-04 19:29:44 +02:00
parent b4f988018a
commit 4a5c1d3c4b
6 changed files with 1080 additions and 7 deletions
--- a/src/model/policy.rs
+++ b/src/model/policy.rs
@@ -0,0 +1,781 @@
+use anyhow::{bail, Context, Result};
+use serde::{de::DeserializeOwned, Deserialize, Serialize};
+use serde_json::Value;
+use toon_format::decode_default;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case", default)]
+pub struct RoutingPolicy {
+    pub policy: ModelRolePolicy,
+    pub routing: RoutingRules,
+    pub gate: PolicyGate,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case", default)]
+pub struct ModelRolePolicy {
+    pub router: String,
+    #[serde(alias = "research")]
+    pub info: String,
+    #[serde(alias = "builder")]
+    pub build: String,
+    pub escalate_to: String,
+    pub conf_threshold: f64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct RoutingRules {
+    pub decision_rules: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case", default)]
+pub struct PolicyGate {
+    pub require_fields: Vec<String>,
+    pub fail_if_conf_less: f64,
+    pub if_conflict_or_missing: String,
+    pub if_test_failure: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PolicyDecision {
+    pub model: String,
+    pub reason: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case", default)]
+pub struct HandoffEnvelope {
+    #[serde(alias = "id")]
+    pub task_id: Option<StringCell>,
+    #[serde(alias = "from", alias = "src")]
+    pub from_model: Option<StringCell>,
+    #[serde(alias = "to", alias = "dst")]
+    pub to_model: Option<StringCell>,
+    #[serde(alias = "obj")]
+    pub objective: Option<String>,
+    pub scope: Option<String>,
+    pub constraints: Option<Vec<String>>,
+    pub current_state: Option<String>,
+    #[serde(alias = "refs")]
+    pub artifact_refs: Option<Vec<String>>,
+    pub expected_output: Option<Value>,
+    pub acceptance_criteria: Option<Vec<String>>,
+    pub risks: Option<Vec<String>>,
+    pub fallback_triggers: Option<Vec<String>>,
+    pub confidence_threshold: Option<f64>,
+    #[serde(alias = "confidence")]
+    pub conf: Option<f64>,
+    pub assumptions_made: Option<Vec<String>>,
+    pub open_questions: Option<Vec<String>>,
+    pub failed_checks: Option<Vec<String>>,
+
+    #[serde(default)]
+    pub context: HandoffContext,
+    #[serde(default)]
+    pub out: HandoffOut,
+    #[serde(default)]
+    pub criteria: HandoffCriteria,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum StringCell {
+    Value(String),
+    Cell { value: String },
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(rename_all = "kebab-case", default)]
+pub struct HandoffContext {
+    #[serde(default)]
+    pub state: Option<String>,
+    #[serde(default)]
+    pub refs: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(rename_all = "kebab-case", default)]
+pub struct HandoffOut {
+    #[serde(default)]
+    pub alternatives: Option<u32>,
+    #[serde(default)]
+    pub recommended: bool,
+    #[serde(default)]
+    pub cite_sources: bool,
+    #[serde(default)]
+    pub format: Option<String>,
+    #[serde(default)]
+    pub diff_summary: Option<bool>,
+    #[serde(default)]
+    pub files: Vec<String>,
+    #[serde(default)]
+    pub tests: Vec<String>,
+    #[serde(default)]
+    pub risks: Option<String>,
+    #[serde(default)]
+    pub open_questions: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(rename_all = "kebab-case", default)]
+pub struct HandoffCriteria {
+    #[serde(default)]
+    pub must: Vec<String>,
+    #[serde(default)]
+    pub fail: Vec<String>,
+    #[serde(default)]
+    pub optional: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case", default)]
+pub struct ReviewEnvelope {
+    pub from: StringCell,
+    pub to: StringCell,
+    pub review: ReviewPayload,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case", default)]
+pub struct ReviewPayload {
+    #[serde(default)]
+    pub task: Option<String>,
+    #[serde(default)]
+    pub conf: Option<f64>,
+    #[serde(default)]
+    pub findings: Vec<String>,
+    #[serde(default)]
+    pub required: Vec<String>,
+    #[serde(default)]
+    pub gate: Option<String>,
+}
+
+impl Default for RoutingPolicy {
+    fn default() -> Self {
+        Self {
+            policy: ModelRolePolicy::default(),
+            routing: RoutingRules::default(),
+            gate: PolicyGate {
+                require_fields: vec![
+                    "task_id".to_string(),
+                    "from_model".to_string(),
+                    "to_model".to_string(),
+                    "objective".to_string(),
+                    "scope".to_string(),
+                    "constraints".to_string(),
+                    "current_state".to_string(),
+                    "artifact_refs".to_string(),
+                    "expected_output".to_string(),
+                    "acceptance_criteria".to_string(),
+                    "risks".to_string(),
+                    "fallback_triggers".to_string(),
+                    "conf".to_string(),
+                    "assumptions_made".to_string(),
+                    "open_questions".to_string(),
+                    "failed_checks".to_string(),
+                ],
+                fail_if_conf_less: 0.85,
+                if_conflict_or_missing: "reroute_to_router".to_string(),
+                if_test_failure: "reroute_to_builder_once_then_router".to_string(),
+            },
+        }
+    }
+}
+
+impl Default for ModelRolePolicy {
+    fn default() -> Self {
+        Self {
+            router: "gpt-5.4".to_string(),
+            info: "gpt-5.4-mini".to_string(),
+            build: "gpt-5.3-codex-spark".to_string(),
+            escalate_to: "gpt-5.4".to_string(),
+            conf_threshold: 0.85,
+        }
+    }
+}
+
+impl Default for StringCell {
+    fn default() -> Self {
+        Self::Value(String::new())
+    }
+}
+
+impl StringCell {
+    pub fn as_str(&self) -> &str {
+        match self {
+            Self::Value(value) => value,
+            Self::Cell { value } => value,
+        }
+    }
+
+    pub fn is_non_empty(&self) -> bool {
+        !self.as_str().trim().is_empty()
+    }
+}
+
+impl RoutingPolicy {
+    pub fn from_toon(content: &str) -> Result<Self> {
+        parse_toon(content)
+    }
+
+    pub fn parse_route(&self, handoff: &HandoffEnvelope) -> Result<PolicyDecision> {
+        handoff.validate_required_fields(self)?;
+
+        let conf_threshold = handoff.confidence_threshold.unwrap_or(self.policy.conf_threshold);
+        let effective_fail_threshold = self.gate.fail_if_conf_less.max(conf_threshold).max(self.policy.conf_threshold);
+        let conf = handoff.conf.unwrap_or(1.0);
+        if conf < effective_fail_threshold {
+            return Ok(PolicyDecision {
+                model: self.policy.escalate_to.clone(),
+                reason: "confidence below configured threshold".to_string(),
+            });
+        }
+
+        if handoff.is_security_or_reliability_sensitive() || handoff.is_contradictory_or_ambiguous() {
+            return Ok(PolicyDecision {
+                model: self.policy.escalate_to.clone(),
+                reason: "risk/contradiction or ambiguous classification requires router".to_string(),
+            });
+        }
+
+        if let Some(to_model) = handoff.to_model.as_ref().map(StringCell::as_str) {
+            let scope = handoff.scope.as_deref().unwrap_or_default();
+            let objective = handoff.objective.as_deref().unwrap_or_default();
+            if is_allowed_model(self, to_model)
+                && (is_builder_scope(scope) || is_info_scope(scope) || is_router_scope(scope))
+            {
+                return Ok(PolicyDecision {
+                    model: to_model.to_string(),
+                    reason: "handoff destination honored".to_string(),
+                });
+            }
+            if is_allowed_model(self, to_model)
+                && (is_builder_scope(objective) || is_info_scope(objective) || is_router_scope(objective))
+            {
+                return Ok(PolicyDecision {
+                    model: to_model.to_string(),
+                    reason: "handoff destination honored".to_string(),
+                });
+            }
+        }
+
+        let scope = handoff.scope.as_deref().unwrap_or_default();
+        let objective = handoff.objective.as_deref().unwrap_or_default();
+
+        let model = if is_router_scope(scope) || is_router_scope(objective) {
+            self.policy.router.clone()
+        } else if is_info_scope(scope) || is_info_scope(objective) {
+            self.policy.info.clone()
+        } else if is_builder_scope(scope) || is_builder_scope(objective) {
+            self.policy.build.clone()
+        } else {
+            self.routing
+                .decision_rules
+                .iter()
+                .find_map(|raw_rule| {
+                    parse_rule(raw_rule).and_then(|rule| {
+                        rule.matches(handoff).then_some(rule.model.clone())
+                    })
+                })
+                .unwrap_or_else(|| self.policy.router.clone())
+        };
+
+        let reason = if model == self.policy.info {
+            "matched information-gathering scope".to_string()
+        } else if model == self.policy.build {
+            "matched implementation/debugging scope".to_string()
+        } else if model == self.policy.router {
+            "matched architectural/decision scope".to_string()
+        } else {
+            format!(
+                "matched decision rule {}",
+                handoff.scope.as_deref().unwrap_or("unknown")
+            )
+        };
+
+        Ok(PolicyDecision { model, reason })
+    }
+}
+
+impl HandoffEnvelope {
+    pub fn from_toon(content: &str) -> Result<Self> {
+        parse_toon(content)
+    }
+
+    pub fn validate_required_fields(&self, policy: &RoutingPolicy) -> Result<()> {
+        let mut missing = Vec::new();
+
+        for field in &policy.gate.require_fields {
+            if !self.has_required_field(field) {
+                missing.push(field.to_string());
+            }
+        }
+
+        if !missing.is_empty() {
+            missing.sort();
+            missing.dedup();
+            bail!("missing required handoff fields: {}", missing.join(", "));
+        }
+
+        if !self.has_required_contract() {
+            bail!(
+                "handoff must include acceptance_criteria and verification fields (assumptions_made/open_questions/failed_checks)"
+            );
+        }
+
+        if let Some(conf) = self.conf
+            && !(0.0..=1.0).contains(&conf)
+        {
+            bail!("confidence value must be normalized in 0.0..1.0, got {conf}");
+        }
+
+        Ok(())
+    }
+
+    fn has_required_field(&self, field: &str) -> bool {
+        match field {
+            "task_id" => self.task_id.as_ref().is_some_and(StringCell::is_non_empty),
+            "from_model" => self.from_model.as_ref().is_some_and(StringCell::is_non_empty),
+            "to_model" => self.to_model.as_ref().is_some_and(StringCell::is_non_empty),
+            "objective" => self
+                .objective
+                .as_ref()
+                .is_some_and(|value| !value.trim().is_empty()),
+            "scope" => self
+                .scope
+                .as_ref()
+                .is_some_and(|value| !value.trim().is_empty()),
+            "constraints" => self.constraints.as_ref().is_some_and(|values| !values.is_empty()),
+            "current_state" => self
+                .current_state
+                .as_ref()
+                .is_some_and(|value| !value.trim().is_empty())
+                || self.context.state.as_ref().is_some_and(|value| !value.trim().is_empty()),
+            "artifact_refs" => self
+                .artifact_refs
+                .as_ref()
+                .is_some_and(|values| !values.is_empty())
+                || !self.context.refs.is_empty(),
+            "expected_output" => self.expected_output.as_ref().is_some_and(|value| !value.is_null()),
+            "acceptance_criteria" => {
+                self.acceptance_criteria.as_ref().is_some_and(|values| !values.is_empty())
+            }
+            "risks" => self.risks.as_ref().is_some_and(|values| !values.is_empty()),
+            "fallback_triggers" => {
+                self.fallback_triggers
+                    .as_ref()
+                    .is_some_and(|values| !values.is_empty())
+            }
+            "conf" => self.conf.is_some(),
+            "assumptions_made" => self
+                .assumptions_made
+                .as_ref()
+                .is_some_and(|values| !values.is_empty()),
+            "open_questions" => self
+                .open_questions
+                .as_ref()
+                .is_some_and(|values| !values.is_empty()),
+            "failed_checks" => self
+                .failed_checks
+                .as_ref()
+                .is_some_and(|values| !values.is_empty()),
+            // Compatibility aliases for earlier envelope versions.
+            "id" => self.task_id.as_ref().is_some_and(StringCell::is_non_empty),
+            "src" => self.from_model.as_ref().is_some_and(StringCell::is_non_empty),
+            "dst" => self.to_model.as_ref().is_some_and(StringCell::is_non_empty),
+            "obj" => self
+                .objective
+                .as_ref()
+                .is_some_and(|value| !value.trim().is_empty()),
+            "refs" => {
+                self.artifact_refs.as_ref().is_some_and(|values| !values.is_empty())
+                    || !self.context.refs.is_empty()
+            }
+            "out" => self.out.has_output(),
+            "criteria" => self.criteria.has_requirements(),
+            _ => true,
+        }
+    }
+
+    fn has_required_contract(&self) -> bool {
+        self.acceptance_criteria
+            .as_ref()
+            .is_some_and(|values| !values.is_empty())
+            && self
+                .assumptions_made
+                .as_ref()
+                .is_some_and(|values| !values.is_empty())
+            && self.open_questions.as_ref().is_some_and(|values| !values.is_empty())
+            && self.failed_checks.as_ref().is_some_and(|values| !values.is_empty())
+    }
+
+    fn is_router_scope_hint(&self) -> bool {
+        let scope = self.scope.as_deref().unwrap_or("");
+        let objective = self.objective.as_deref().unwrap_or("");
+        is_router_scope(scope) || is_router_scope(objective)
+    }
+
+    fn is_info_scope_hint(&self) -> bool {
+        let scope = self.scope.as_deref().unwrap_or("");
+        let objective = self.objective.as_deref().unwrap_or("");
+        is_info_scope(scope) || is_info_scope(objective)
+    }
+
+    fn is_builder_scope_hint(&self) -> bool {
+        let scope = self.scope.as_deref().unwrap_or("");
+        let objective = self.objective.as_deref().unwrap_or("");
+        is_builder_scope(scope) || is_builder_scope(objective)
+    }
+
+    fn is_security_or_reliability_sensitive(&self) -> bool {
+        let haystack = format!(
+            "{} {} {} {}",
+            self.scope.as_deref().unwrap_or_default(),
+            self.objective.as_deref().unwrap_or_default(),
+            self.risks
+                .as_ref()
+                .map(|items| items.join(" "))
+                .unwrap_or_default(),
+            self.fallback_triggers
+                .as_ref()
+                .map(|items| items.join(" "))
+                .unwrap_or_default()
+        );
+        is_security_signal(&haystack) || self
+            .artifact_refs
+            .as_ref()
+            .is_some_and(|refs| refs.iter().any(|value| {
+                let normalized = value.to_ascii_lowercase();
+                normalized.contains("security") || normalized.contains("privacy")
+            }))
+    }
+
+    fn is_contradictory_or_ambiguous(&self) -> bool {
+        let haystack = format!(
+            "{} {} {}",
+            self.scope.as_deref().unwrap_or_default(),
+            self.objective.as_deref().unwrap_or_default(),
+            self.fallback_triggers
+                .as_ref()
+                .map(|items| items.join(" "))
+                .unwrap_or_default(),
+        );
+        let is_contradictory = self
+            .failed_checks
+            .as_ref()
+            .is_some_and(|checks| {
+                checks
+                    .iter()
+                    .any(|value| is_contradictory_signal(value))
+            });
+        let is_ambiguous = is_ambiguous_signal(&haystack);
+        is_contradictory || is_ambiguous
+    }
+}
+
+impl ReviewEnvelope {
+    pub fn from_toon(content: &str) -> Result<Self> {
+        parse_toon(content)
+    }
+}
+
+impl HandoffOut {
+    fn has_output(&self) -> bool {
+        self.alternatives.is_some()
+            || self.recommended
+            || self.cite_sources
+            || self.format.is_some()
+            || self.diff_summary.unwrap_or(false)
+            || !self.files.is_empty()
+            || !self.tests.is_empty()
+            || self.risks.is_some()
+            || self.open_questions.is_some()
+    }
+}
+
+impl HandoffCriteria {
+    fn has_requirements(&self) -> bool {
+        !self.must.is_empty() || !self.fail.is_empty()
+    }
+}
+
+#[derive(Debug, Clone)]
+struct Rule {
+    left: String,
+    model: String,
+}
+
+impl Rule {
+    fn matches(&self, handoff: &HandoffEnvelope) -> bool {
+        if self
+            .left
+            .split("->")
+            .next()
+            .is_some_and(|left| contains_token_or_phrase(handoff.scope.as_deref().unwrap_or(""), left))
+        {
+            return true;
+        }
+
+        if contains_token_or_phrase(
+            handoff.objective.as_deref().unwrap_or(""),
+            &self.left,
+        ) {
+            return true;
+        }
+
+        handoff
+            .constraints
+            .as_ref()
+            .into_iter()
+            .flat_map(|constraints| constraints.iter())
+            .chain(self.artifact_refs_or_fallback(handoff).iter())
+            .chain(handoff.context.refs.iter())
+            .any(|value| contains_token_or_phrase(value, &self.left))
+    }
+
+    fn artifact_refs_or_fallback<'a>(&self, handoff: &'a HandoffEnvelope) -> &'a Vec<String> {
+        handoff.artifact_refs.as_ref().unwrap_or(&handoff.context.refs)
+    }
+}
+
+fn parse_rule(raw: &str) -> Option<Rule> {
+    let mut split = raw.splitn(2, "=>");
+    let left = split.next()?.trim().to_ascii_lowercase();
+    let model = split.next()?.trim().to_string();
+
+    if left.is_empty() || model.is_empty() {
+        return None;
+    }
+
+    Some(Rule { left, model })
+}
+
+fn is_router_scope(scope: &str) -> bool {
+    is_scope_like(scope, &[
+        "architectural",
+        "architectural_decision",
+        "triage",
+        "tradeoff",
+        "ambiguous",
+        "ambiguity",
+        "security",
+        "privacy",
+        "migration",
+        "incident",
+        "reliability",
+    ])
+}
+
+fn is_info_scope(scope: &str) -> bool {
+    is_scope_like(scope, &[
+        "research",
+        "information",
+        "information_gathering",
+        "docs",
+        "api",
+        "contract",
+        "comparison",
+        "search",
+        "options",
+        "lookup",
+        "investigate",
+    ])
+}
+
+fn is_builder_scope(scope: &str) -> bool {
+    is_scope_like(scope, &[
+        "code",
+        "code_change",
+        "implementation",
+        "refactor",
+        "tests",
+        "test",
+        "debug",
+        "debugging",
+        "execution",
+        "implement",
+        "build",
+    ])
+}
+
+fn is_scope_like(scope: &str, tokens: &[&str]) -> bool {
+    tokens.iter().any(|token| contains_token_or_phrase(scope, token))
+}
+
+fn is_security_signal(value: &str) -> bool {
+    is_token_or_phrase_in_set(
+        value,
+        &[
+            "security",
+            "privacy",
+            "migration",
+            "incident",
+            "data loss",
+            "data-loss",
+            "reliability",
+            "production",
+        ],
+    )
+}
+
+fn is_ambiguous_signal(value: &str) -> bool {
+    is_token_or_phrase_in_set(value, &["ambiguous", "ambiguity", "underdetermined", "unclear"])
+}
+
+fn is_contradictory_signal(value: &str) -> bool {
+    is_token_or_phrase_in_set(value, &["contradictory", "conflict", "inconsistent", "mismatch"])
+}
+
+fn is_token_or_phrase_in_set(value: &str, tokens: &[&str]) -> bool {
+    let normalized = value.to_ascii_lowercase();
+    tokens.iter().any(|token| contains_token_or_phrase(&normalized, token))
+}
+
+fn is_allowed_model(policy: &RoutingPolicy, model: &str) -> bool {
+    let m = model.trim();
+    m == policy.policy.router || m == policy.policy.info || m == policy.policy.build || m == policy.policy.escalate_to
+}
+
+fn contains_token_or_phrase(target: &str, expected: &str) -> bool {
+    if expected.is_empty() {
+        return false;
+    }
+    let normalized = target.to_ascii_lowercase();
+    normalized.split(|c: char| !c.is_ascii_alphanumeric() && c != '_')
+        .any(|token| !token.is_empty() && token == expected)
+        || normalized.contains(expected)
+}
+
+fn parse_toon<T: DeserializeOwned>(content: &str) -> Result<T> {
+    decode_default(content).context("failed to decode TOON payload")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const POLICY_TP: &str = r##"
+policy:
+  router: "gpt-5.4"
+  info: "gpt-5.4-mini"
+  build: "gpt-5.3-codex-spark"
+  escalate_to: "gpt-5.4"
+  conf_threshold: 0.85
+routing:
+  decision_rules[4]: "architectural_decision => gpt-5.4", "debugging_or_testing => gpt-5.3-codex-spark", "docs_api_lookup_or_comparison => gpt-5.4-mini", "security_privacy_migration_incident => gpt-5.4"
+gate:
+  require_fields[15]: "task_id", "from_model", "to_model", "objective", "scope", "constraints", "current_state", "artifact_refs", "expected_output", "acceptance_criteria", "risks", "fallback_triggers", "conf", "assumptions_made", "open_questions", "failed_checks"
+  fail_if_conf_less: 0.85
+  if_conflict_or_missing: "reroute_to_router"
+  if_test_failure: "reroute_to_builder_once_then_router"
+"##;
+
+    const RESEARCH_ENVELOPE: &str = r##"
+task_id:
+  value: "task-102"
+from_model: "gpt-5.4"
+to_model: "gpt-5.4-mini"
+objective: "Find current API contract changes needed for feature X."
+scope: "information_gathering"
+constraints[3]: "no code edits", "cite source path", "stack: rust"
+current_state: "prior_work: baseline-impl-v2"
+artifact_refs[2]: "docs/api.md#120-210", "issues/412"
+expected_output:
+  alternatives: ["A", "B"]
+  recommended_choice: "A"
+  citations: ["docs/api.md#120-210"]
+acceptance_criteria[3]: "produce >=2 options", "explain tradeoffs", "state confidence"
+risks[1]: "none"
+fallback_triggers[1]: "none"
+conf: 0.88
+assumptions_made[1]: "contracts are stable"
+open_questions[1]: "none"
+failed_checks[1]: "none"
+"##;
+
+    const IMPLEMENTER_ENVELOPE: &str = r##"
+task_id:
+  value: "task-102"
+from_model: "gpt-5.4"
+to_model: "gpt-5.3-codex-spark"
+objective: "Implement the chosen API adjustment with tests."
+scope: "code_change"
+constraints[4]: "edit only listed files", "maintain existing behavior", "no_semver_break", "no code edits"
+current_state: "decision_outcome: option_a_selected"
+artifact_refs[3]: "research/task-102.toon", "src/current_api.ts", "tests/api.test.ts"
+expected_output:
+  code_diff_summary: "..."
+  test_plan: ["/ path"]
+  risk_notes: ["none"]
+  self_check: "ok"
+acceptance_criteria[3]: "tests added/updated", "no breaking change", "backwards behavior unchanged"
+risks[1]: "none"
+fallback_triggers[1]: "implementation validation required"
+conf: 0.9
+assumptions_made[1]: "prior research is current"
+open_questions[1]: "none"
+failed_checks[1]: "none"
+"##;
+
+    const REVIEW_ENVELOPE: &str = r##"
+from:
+  value: "gpt-5.3-codex-spark"
+to:
+  value: "gpt-5.4"
+review:
+  task: "task-102"
+  conf: 0.82
+  findings[3]: "assumptions", "risks", "validation_status"
+  required[3]: "evidence", "counterexamples", "alternative_if_incomplete"
+  gate: "approve | revise | escalate"
+"##;
+
+    #[test]
+    fn parses_policy_toon_and_routes_by_scope() {
+        let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy");
+        let handoff = HandoffEnvelope::from_toon(RESEARCH_ENVELOPE).expect("parse research");
+        assert_eq!(
+            policy
+                .parse_route(&handoff)
+                .expect("route")
+                .model,
+            "gpt-5.4-mini"
+        );
+    }
+
+    #[test]
+    fn validates_required_envelope_fields() {
+        let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy");
+        let handoff = HandoffEnvelope::from_toon(IMPLEMENTER_ENVELOPE).expect("parse implementer");
+        assert!(handoff.validate_required_fields(&policy).is_ok());
+    }
+
+    #[test]
+    fn escalates_when_confidence_below_threshold() {
+        let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy");
+        let mut handoff = HandoffEnvelope::from_toon(IMPLEMENTER_ENVELOPE).expect("parse implementer");
+        handoff.conf = Some(0.6);
+
+        let routed = policy.parse_route(&handoff).expect("route");
+        assert_eq!(routed.model, "gpt-5.4");
+    }
+
+    #[test]
+    fn escalates_on_security_signal() {
+        let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy");
+        let mut handoff = HandoffEnvelope::from_toon(IMPLEMENTER_ENVELOPE).expect("parse implementer");
+        handoff.risks = Some(vec!["privacy".to_string(), "migration".to_string()]);
+
+        let routed = policy.parse_route(&handoff).expect("route");
+        assert_eq!(routed.model, "gpt-5.4");
+    }
+
+    #[test]
+    fn parses_review_envelope_toon() {
+        let review = ReviewEnvelope::from_toon(REVIEW_ENVELOPE).expect("parse review");
+        assert_eq!(review.from.as_str(), "gpt-5.3-codex-spark");
+        assert_eq!(review.review.task.as_deref(), Some("task-102"));
+    }
+}