feat: 3 person review

This commit is contained in:
eric
2026-04-04 13:12:17 +02:00
parent d360a2b23c
commit 32147d4552
50 changed files with 2398 additions and 660 deletions

View File

@@ -1,9 +1,9 @@
engine: "data-driven-v1" engine: "data-driven-v1"
goal_file: ".agent/controllers/teamwise-prompt-lens/goal.md" goal_file: ".agent/controllers/longview-planner/goal.md"
plan_file: ".agent/controllers/teamwise-prompt-lens/plan.toon" plan_file: ".agent/controllers/longview-planner/plan.toon"
state_file: ".agent/controllers/teamwise-prompt-lens/state.toon" state_file: ".agent/controllers/longview-planner/state.toon"
standards_file: ".agent/controllers/teamwise-prompt-lens/standards.md" standards_file: ".agent/controllers/longview-planner/standards.md"
branch: "codex/teamwise-prompt-lens" branch: "codex/longview-planner"
continue_until: "fixed-point" continue_until: "fixed-point"
max_runs: 12 max_runs: 12
max_wall_clock: 4h max_wall_clock: 4h

View File

@@ -1,3 +0,0 @@
# Goal
Describe the goal for this controller.

View File

@@ -1,3 +0,0 @@
version: 1
goal_summary: No plan yet
steps[0]:

View File

@@ -1,5 +0,0 @@
# Standards
- Keep code maintainable.
- Avoid one-off hacks.
- Leave tests green.

View File

@@ -1,21 +0,0 @@
version: 1
phase: planning
goal_status: unknown
goal_revision: 0
current_step_id: null
iteration: 0
replan_required: false
completed_steps[0]:
blocked_steps[0]:
last_verification: null
last_cleanup_summary: null
last_full_test_summary: null
history[0]:
notes[0]:
planning_session:
pending_question: null
transcript[0]:
started_at: "1775272586"
last_usage_refresh_at: "1775272706"
last_usage_input_tokens: null
last_usage_output_tokens: null

View File

@@ -1,4 +0,0 @@
# Goal
Rewrite `codex-controller-loop` as a Rust TUI-first autonomous controller with TOON-backed machine state and a hard planning/execution phase boundary.

View File

@@ -1,3 +0,0 @@
version: 1
goal_summary: Rust TUI-first autonomous controller
steps[0]:

View File

@@ -1,8 +0,0 @@
# Standards
- Keep the Rust code modular and readable.
- Treat planning as the only user-input phase.
- Treat execution as autonomous except for pause, resume, stop, and goal update.
- Keep controller-owned machine state in TOON files.
- Leave the codebase in a maintainable state after each completed step.

View File

@@ -1,17 +0,0 @@
version: 1
phase: planning
goal_status: unknown
goal_revision: 0
current_step_id: null
iteration: 0
replan_required: false
completed_steps[0]:
blocked_steps[0]:
last_verification: null
last_cleanup_summary: null
last_full_test_summary: null
history[0]:
notes[0]:
planning_session:
pending_question: null
transcript[0]:

View File

@@ -1,20 +0,0 @@
# Goal
Identify oversized, hand-maintained source files in the repository, prioritize the highest-value refactor targets, and split them into smaller, cohesive modules without changing external behavior.
A file should be considered a refactor candidate when it is materially large or overloaded, using these default signals:
- More than 300 lines of hand-written code.
- Multiple unrelated responsibilities in one file.
- Difficult-to-test logic mixed with I/O, UI, routing, state wiring, or formatting.
Execution requirements:
- Ignore generated, vendored, build, cache, and lock files unless the repository clearly treats them as hand-maintained source.
- Refactor incrementally, one target at a time, starting with the largest safe candidate.
- Preserve public APIs and user-visible behavior unless a compatibility adjustment is required to complete the split safely.
- Leave the repository in a clean, test-passing state.
Expected outputs:
- Smaller files with clearer ownership boundaries.
- Any necessary import/export or module wiring updates.
- Tests updated or added when needed to preserve behavior.
- A concise summary of files split, new module boundaries, and verification results.

View File

@@ -1,94 +0,0 @@
version: 1
goal_summary: "Audit the repository for oversized hand-maintained source files, prioritize safe high-value refactor targets, split them into smaller cohesive modules, and finish with passing validation and a clean diff."
steps[6]:
- id: s1
title: Establish Safe Refactor Scope
purpose: "Load controller inputs and repository constraints, confirm the working tree state, and define the exact file-selection rules so execution can proceed without ambiguity."
inputs[4]: ".agent/controllers/keystone-seam-audit/goal.md",".agent/controllers/keystone-seam-audit/standards.md",repository working tree,"repo-level instructions such as AGENTS.md if present"
outputs[3]: confirmed execution constraints,candidate exclusion rules,list of locally modified files to avoid or handle carefully
dependencies[0]:
verification[2]:
- label: Check working tree state
commands[1]: "git status --short"
- label: Locate repo instructions
commands[1]: "rg --files -g 'AGENTS.md' -g '.agent/**'"
cleanup_requirements[1]{label,description}:
No accidental overlap with user edits,Do not modify files with unrelated local changes unless the change is required and the existing edits are understood and preserved.
status: active
attempts: 1
- id: s2
title: "Inventory Large Hand-Maintained Files"
purpose: "Produce a ranked inventory of oversized source files while excluding generated and third-party material."
inputs[2]: repository file list,selection thresholds from goal and standards
outputs[3]: ranked candidate list with line counts,"excluded-path list",initial top refactor targets
dependencies[1]: s1
verification[2]:
- label: Enumerate tracked files
commands[1]: "rg --files"
- label: Rank large files by line count
commands[1]: "python - <<'PY'\nimport os, subprocess\nexclude = {'node_modules','dist','build','coverage','.git','.next','.svelte-kit','target','vendor'}\nfiles = subprocess.check_output(['rg','--files']).decode().splitlines()\nrows = []\nfor f in files:\n parts = set(f.split('/'))\n if parts & exclude:\n continue\n if os.path.splitext(f)[1] in {'.png','.jpg','.jpeg','.gif','.svg','.lock','.snap','.min.js','.map'}:\n continue\n try:\n with open(f,'r',encoding='utf-8') as fh:\n n = sum(1 for _ in fh)\n except Exception:\n continue\n if n > 300:\n rows.append((n,f))\nfor n,f in sorted(rows, reverse=True)[:50]:\n print(f'{n}\\t{f}')\nPY"
cleanup_requirements[1]{label,description}:
Discard false positives,"Remove generated files, migration dumps, fixtures, and machine-authored artifacts from the candidate list before choosing targets."
status: todo
attempts: 0
- id: s3
title: Choose Refactor Order And Boundaries
purpose: "Inspect the largest candidates, decide which files are safe and high-value to split first, and define intended module boundaries before editing."
inputs[3]: ranked candidate list,current file contents,existing module structure
outputs[3]: ordered target list,boundary notes for each target,"explicit non-goals for each refactor"
dependencies[1]: s2
verification[2]:
- label: Inspect top candidates
commands[2]: "sed -n '1,220p' <top-candidate-file>","sed -n '221,440p' <top-candidate-file>"
- label: Map exports and dependents
commands[1]: "rg -n \"from ['\\\"]|require\\(\" <candidate-basename-or-symbols>"
cleanup_requirements[1]{label,description}:
"Avoid cosmetic-only churn","Do not split files purely by line count; only proceed when coherent seams such as utilities, domain logic, adapters, routes, or components are identifiable."
status: todo
attempts: 0
- id: s4
title: Refactor First Target Incrementally
purpose: "Split the highest-priority candidate into smaller cohesive files while preserving behavior and keeping the change reviewable."
inputs[3]: first target file,boundary notes,repo conventions
outputs[3]: new smaller modules,updated imports/exports,"target-specific validation result"
dependencies[1]: s3
verification[2]:
- label: Run targeted tests or checks
commands[1]: "<smallest-relevant-test-or-lint-command>"
- label: Confirm file size reduction
commands[1]: "wc -l <affected-files>"
cleanup_requirements[1]{label,description}:
Remove temporary seams,"Delete transitional helpers, dead exports, and unused imports created during the split before moving on."
status: todo
attempts: 0
- id: s5
title: "Repeat For Remaining High-Value Targets"
purpose: "Continue the same refactor pattern for additional oversized files until the main high-value targets are addressed or diminishing returns are reached."
inputs[2]: remaining ordered targets,lessons from first refactor
outputs[3]: additional split modules,updated dependency wiring,"per-target validation notes"
dependencies[1]: s4
verification[2]:
- label: "Run per-target validation after each split"
commands[1]: "<smallest-relevant-test-or-lint-command>"
- label: Track remaining oversized files
commands[1]: "python - <<'PY'\nimport os, subprocess\nexclude = {'node_modules','dist','build','coverage','.git','.next','.svelte-kit','target','vendor'}\nfiles = subprocess.check_output(['rg','--files']).decode().splitlines()\nfor f in files:\n parts = set(f.split('/'))\n if parts & exclude:\n continue\n try:\n with open(f,'r',encoding='utf-8') as fh:\n n = sum(1 for _ in fh)\n except Exception:\n continue\n if n > 300:\n print(f'{n}\\t{f}')\nPY"
cleanup_requirements[1]{label,description}:
Stop at sensible boundary,"Do not keep splitting once modules are cohesive and maintainable; leave well-structured files intact even if they remain moderately large."
status: todo
attempts: 0
- id: s6
title: Run Full Validation And Final Cleanup
purpose: "Verify repository health, remove leftover refactor debris, and produce a concise execution summary for the controller result."
inputs[2]: all refactor changes,repository validation commands
outputs[3]: final passing validation results,cleaned diff,"summary of targets, seams, and tests"
dependencies[1]: s5
verification[2]:
- label: Run broadest available validation
commands[3]: "<repo-test-command>","<repo-lint-command-if-available>","<repo-build-or-typecheck-command-if-available>"
- label: Check for leftover issues
commands[2]: "git diff --check","git status --short"
cleanup_requirements[2]{label,description}:
Leave clean refactor artifacts,"Remove unused files, stale exports, dead code, and temporary comments; ensure only intentional source changes remain."
Record outcome,"Summarize which files were split, the new module boundaries, any tests added or updated, and any remaining large files intentionally left unchanged."
status: todo
attempts: 0

View File

@@ -1,14 +0,0 @@
# Standards
- Prefer cohesive modules with a single clear responsibility over line-count-only splitting.
- Preserve behavior first; do not mix feature work with the refactor.
- Avoid one-off abstractions. New modules must reflect real ownership boundaries that the codebase can keep using.
- Keep public interfaces stable unless a minimal coordinated update is required.
- Exclude generated, vendored, compiled, snapshot, fixture, and lock files from refactor targets unless explicitly treated as source by the repo.
- Before editing, check for existing local changes in candidate files and avoid overwriting user work.
- Refactor in small, reviewable increments so each target can be validated independently.
- Run the smallest relevant test or lint command after each target, then run the broadest available final validation before finishing.
- If no automated tests exist for a risky seam, add or strengthen focused tests before or during the refactor when practical.
- Keep names, module placement, and imports maintainable and consistent with existing repo conventions.
- Remove dead helpers, obsolete re-exports, and temporary scaffolding introduced during the split.
- Leave tests green.

View File

@@ -1,25 +0,0 @@
version: 1
phase: blocked
goal_status: blocked
goal_revision: 1
current_step_id: null
iteration: 0
replan_required: false
completed_steps[0]:
blocked_steps[0]:
last_verification: null
last_cleanup_summary: null
last_full_test_summary: null
history[0]:
notes[1]: No actionable step remained and autonomous replan produced nothing.
planning_session:
pending_question: null
transcript[4]{role,content}:
user,test
assistant,"What should the `keystone-seam-audit` controller actually do? Please specify the target system/repo area, what a \"seam audit\" should inspect, and the expected output artifact(s)."
user,find large files and refactor to smaller files
assistant,Planning completed
started_at: "1775273562"
last_usage_refresh_at: "1775275327"
last_usage_input_tokens: null
last_usage_output_tokens: null

View File

@@ -0,0 +1,4 @@
## Goal
- Improve the new-task goal planner so every newly created task is processed through a consistent three-stage personality chain: Product Owner, Senior Engineer, and Senior Maintainer.
- Use this chain to strengthen requirement clarity, implementation quality, and long-term iterability before a task enters the execution/verifier loop.
- Enforce planning outputs that bias maintainable design over short-term fixes, reducing the risk of test-passing but brittle implementations.

View File

@@ -0,0 +1,82 @@
version: 1
goal_summary: "Implement a structured three-persona planning pipeline for new task creation and wire it into quality gates that prevent spaghetti-like outcomes."
steps[6]:
- id: "step-1"
title: "Map current goal-planning execution path"
purpose: "Identify where new tasks are created and where planner output is generated, stored, and consumed."
notes: This is required to avoid adding persona logic in the wrong layer and breaking existing task creation contracts.
inputs[1]: "Current new-task creation flow"
outputs[1]: Execution map of planner entry/egress points
dependencies[0]:
verification[1]:
- label: Baseline understanding
commands[3]: Trace task creation path from command/API input to persisted task goal,"Locate planner modules, serializers, and validation hooks",Document current persona/prompt hooks (if any)
cleanup_requirements[0]:
status: done
attempts: 1
- id: "step-2"
title: Reset stale active state
purpose: "Re-run finalization of three-persona planner sequencing before execution resumes"
notes: "Recovered stale active step state required step-2 to be retried before proceeding."
inputs[0]:
outputs[0]:
dependencies[0]:
verification[0]:
cleanup_requirements[0]:
status: done
attempts: 1
- id: "step-3"
title: Implement sequential persona pipeline in goal planner
purpose: "Apply PO -> Senior Engineer -> Future Engineer transformations and merge outputs into a single maintainability-first goal artifact."
notes: Recovery shows planner execution was reset; this step is now the active unblocked execution path. Controller recovered this step from stale active state and returned it to todo.
inputs[1]: Persona contract and config
outputs[2]: Updated planner pipeline implementation,Deterministic merged goal artifact
dependencies[1]: "step-2"
verification[1]:
- label: Determinism
commands[3]: Add unit tests asserting fixed output ordering and stable merge for same input seed/state,Add tests for conflict resolution and fallback behavior,... 1 more omitted
cleanup_requirements[1]{label,description}:
Safe integration,Remove inline prompt branching that bypasses the new pipeline.
status: done
attempts: 1
- id: "step-4"
title: "Reopen step-4 implementation"
purpose: Complete remaining implementation work before downstream validation/rollout tasks.
notes: "Recovery flagged step-4 as stale-active; it must be treated as unfinished to keep planner gating chain valid."
inputs[0]:
outputs[0]:
dependencies[0]:
verification[0]:
cleanup_requirements[0]:
status: done
attempts: 1
- id: "step-5"
title: Add regression tests and rollout docs
purpose: "Validate the three-stage PO→Senior Engineer→Architect flow with deterministic templates, explicit constraints, and maintenance-focused rejection/annotation behavior before proceeding."
notes: "Keep this step as the first blocker: lock in behavioral contracts and anti-bypass coverage, then add concise docs so future work cannot drift into one-off prompts. Controller recovered this step from stale active state and returned it to todo."
inputs[2]: Implemented pipeline and gates,"Three-stage persona templates and merge rules"
outputs[3]: Regression tests for staged transformations,"Tests for downgrade/reject/delta-capture logic that flags maintenance-risk solutions",Developer docs for staged goal generation and constraints
dependencies[1]: "step-4"
verification[1]:
- label: Regression safety
commands[3]: "Run new-task planner unit/integration test set for all 3 passes","Run decision-path tests for reject/downgrade/annotate outcomes","Run existing task-creation and verifier-loop regression tests"
cleanup_requirements[1]{label,description}:
Documentation clarity,"Delete stale references to old single-pass planning behavior."
status: done
attempts: 1
- id: "step-6"
title: "Enable three-stage persona telemetry and rollout guardrails"
purpose: "Add immutable planner instrumentation and hard enforcement for product-owner, senior-engineer, senior-maintainer stage transitions before execution."
notes: "Implement stage event emission (`Persona stage`, `stage-1`, `stage-2`, `stage-3`) with deterministic persona-pass metadata; block single-pass/bypass paths so every new-task goal goes through all three stages; keep this guardrail work separate from doc/test-generation output paths."
inputs[0]:
outputs[0]:
dependencies[1]: "step-4"
verification[2]:
- label: Stage telemetry contract
commands[2]: "Emit planner session events for every pass including persona template version, constraints, risks, architectural context, and acceptance criteria","Emit explicit `Persona stage` and `stage-1/2/3` transitions from the same approved pipeline path for all new-task goals"
- label: Guardrail enforcement
commands[2]: "Reject execution when single-pass planning or stage skipping is detected","Add rollout counters for rejected/annotated goals by failure type and stage, and alert on threshold breaches"
cleanup_requirements[1]{label,description}:
Operational overhead,"Disable debug-level per-task instrumentation in non-debug environments once the new telemetry/guards are stable."
status: done
attempts: 1

View File

@@ -0,0 +1,10 @@
## Standards
- Preserve and improve maintainability by requiring architecture-aware task goals, explicit constraints, and explicit iteration context.
- Keep persona behavior deterministic with versioned templates, ordered composition, and stable merge rules.
- Reject, downgrade, or flag goals that optimize correctness only and create high future maintenance cost.
- Avoid one-off hacks and ad-hoc prompt bypasses; route all goal generation through the same approved planner flow.
- Add lightweight observability for each persona transformation and keep existing verification and tests green.
- Stage goal generation must follow the exact order: product-owner, senior-engineer, senior-maintainer.
- Emit all stage transitions as planner session events (`Persona stage`, `stage-1..3`) and include persona pass metadata.
- Enforce persona-pass contracts with explicit constraints, risks, and architecture-aware acceptance criteria in every stage.
- Keep one-pass behavior removed from rollout docs: single-pass planning is no longer an accepted path for new tasks.

View File

@@ -0,0 +1,49 @@
version: 1
phase: done
stop_reason: null
goal_status: done
goal_revision: 1
current_step_id: null
iteration: 8
replan_required: true
completed_steps[8]: "step-1","step-2","step-2","step-3","step-4","step-4","step-5","step-6"
blocked_steps[0]:
last_verification:
passed: true
summary: No commands requested
commands[0]:
output[0]:
last_cleanup_summary:
passed: true
summary: "Cleanup accepted for step-6"
commands[0]:
output[5]: Did not run verification or tests per your execution constraints.,"Single-pass legacy schema outputs are still parsed for compatibility but are rejected by the new pipeline contract check before acceptance.","If you want, next step is to add targeted tests for the new counter/threshold paths in forwarder.","",next_step_not_required
last_full_test_summary:
passed: true
summary: No commands requested
commands[0]:
output[0]:
history[8]{timestamp,kind,detail}:
"1775299756","step-complete","Completed step-1"
"1775299853","step-complete","Completed step-2"
"1775299897","step-complete","Completed step-2"
"1775300068","step-complete","Completed step-3"
"1775300173","step-complete","Completed step-4"
"1775300488","step-complete","Completed step-4"
"1775300610","step-complete","Completed step-5"
"1775300806","step-complete","Completed step-6"
notes[4]: "Recovered stale active step state for longview-planner. Reset step-2 to todo.","Recovered stale active step state for longview-planner. Reset step-3 to todo.","Recovered stale active step state for longview-planner. Reset step-4 to todo.","Recovered stale active step state for longview-planner. Reset step-5 to todo."
planning_session:
pending_question: null
transcript[2]{role,content}:
user,"Improve the goal planner when creating a new task. It should pass through a PO personality, a senior engineer personality, a engineer who works on the codebase in 2 years. This is to improve code and prevent the loop from writing spaghetti code that passes verification and tests barely, but is unable to be iterated upon"
assistant,Planning completed
started_at: "1775299715"
last_usage_refresh_at: "1775300358"
last_usage_input_tokens: null
last_usage_output_tokens: null
last_usage_primary_window: null
last_usage_secondary_window: null
run_model: "gpt-5.3-codex-spark"
fast_mode: true
allow_branching: false

View File

@@ -1,5 +0,0 @@
# Goal
Refactor the oversized Rust modules in this repository into smaller, focused directory modules without changing runtime behavior, controller flow, or persisted controller file formats.
Prioritize `src/ui/mod.rs`, `src/model.rs`, `src/process.rs`, `src/storage/toon.rs`, `src/app/workspace_input.rs`, and `src/app/runtime.rs`. End with thin `mod.rs` facades, clear ownership boundaries, stable public APIs or `pub use` reexports where they reduce churn, colocated tests for the moved logic, and a green Rust verification pass.

View File

@@ -1,130 +0,0 @@
version: 6
goal_summary: "Refactor the remaining oversized Rust modules into focused directory modules with thin facades, stable public APIs, colocated tests, unchanged controller behavior and persisted formats, then finish with full Rust verification."
steps[8]:
- id: guardrails
title: Add Refactor Guardrails
purpose: Lock down the current behavior that must survive file moves before changing module structure.
notes: "Completed. Guardrail coverage exists for model, process, storage, app, and UI behavior, including serialized cwd-sensitive TOON tests and the cached session-view shape used by app/UI call sites."
inputs[7]: src/model.rs,src/process.rs,src/storage/toon.rs,src/app/tests.rs,src/ui/mod.rs,src/app/session.rs,src/app/mod.rs
outputs[5]: Focused tests covering plan/state/schema behavior in the model boundary,"Focused tests covering codex event parsing, usage snapshots, and stderr filtering","Focused tests covering TOON roundtrips, controller discovery, controller id normalization, and cwd-sensitive discovery flows","Focused app tests covering planning command gating, scrolling, and submission behavior","Focused UI tests covering screen rendering, wrapping, and session selection extraction"
dependencies[0]:
verification[1]:
- label: Targeted guardrail tests
commands[5]: "cargo test -q model::tests","cargo test -q process::tests","cargo test -q storage::toon::tests","cargo test -q app::tests","cargo test -q ui::tests"
cleanup_requirements[2]{label,description}:
Colocate new tests,"Keep each new or moved test with the module that owns the behavior instead of adding another catch-all test file."
Avoid duplicate fixtures,Reuse existing sample app and model fixtures where possible so the refactor does not create parallel test scaffolding.
status: done
attempts: 1
- id: "model-modules"
title: Split Shared Model Types
purpose: "Refactor `src/model.rs` into focused submodules while preserving the existing `crate::model::*` surface."
notes: "Completed. `src/model.rs` is now a directory facade with focused submodules, stable reexports, and colocated tests protecting schemas, plan helpers, session views, and response types."
inputs[1]: src/model.rs
outputs[8]: src/model/mod.rs facade reexporting the stable public model API,"src/model/controller.rs for screen, phase, goal status, step status, and controller state types","src/model/plan.rs for task config, plan structs, and plan mutation helpers","src/model/session.rs for session enums, grouping, cursor, cached session-view, and selection helpers",src/model/usage.rs for usage and status snapshot types,"src/model/response.rs for planner, executor, and controller summary response types",src/model/schema.rs for JSON schema builders,Model tests moved beside their owning submodules
dependencies[1]: guardrails
verification[1]:
- label: Model regression tests
commands[6]: "cargo test -q model::controller::tests","cargo test -q model::plan::tests","cargo test -q model::session::tests","cargo test -q model::schema::tests","cargo test -q model::usage::tests","cargo test -q"
cleanup_requirements[3]{label,description}:
Thin facade,Leave `src/model/mod.rs` as a reexport surface rather than reintroducing large inline implementations there.
Serde and schema parity,"Keep existing serde attributes, defaults, and JSON schema output unchanged while splitting the code."
No stale aliases,"Remove transitional imports or compatibility types that only mirror the old single-file layout once the facade exports are wired."
status: done
attempts: 3
- id: "process-modules"
title: Split Process Execution And Parsing
purpose: "Refactor `src/process.rs` into focused modules for codex execution, shell execution, usage snapshots, and event parsing."
notes: "Completed. `src/process.rs` was replaced with focused modules and a stable facade. Public entry points remain unchanged, targeted parser and usage tests pass, and no new process-specific warnings were introduced."
inputs[2]: src/process.rs,src/model/mod.rs
outputs[6]: src/process/mod.rs facade preserving the current public functions,src/process/codex.rs for `run_codex_with_schema` and controller id generation,src/process/shell.rs for shell command execution and command summaries,src/process/usage.rs for usage snapshot helpers,src/process/parser.rs for codex JSON line parsing and rendering helpers,Process parsing tests kept with the parser and usage modules
dependencies[2]: guardrails,"model-modules"
verification[1]:
- label: Process regression tests
commands[3]: "cargo test -q process::parser::tests","cargo test -q process::usage::tests","cargo test -q"
cleanup_requirements[3]{label,description}:
Stable process API,"Preserve the current `crate::process::*` entry points so controller and app call sites stay unchanged."
No parsing drift,"Keep command, tool, thinking, usage, and stderr event behavior identical after extraction."
Cull moved dead code,Delete obsolete inline helpers in the old file instead of leaving duplicate parser or usage logic behind.
status: done
attempts: 1
- id: "storage-toon-modules"
title: Split TOON Persistence Helpers
purpose: "Refactor `src/storage/toon.rs` into focused persistence, discovery, codec, and id helper modules without changing file formats."
notes: "Completed. `src/storage/toon.rs` is now a focused directory module with stable facade exports, shared codec helpers, preserved cwd-sensitive discovery behavior, and passing targeted plus full test runs."
inputs[2]: src/storage/toon.rs,src/model/mod.rs
outputs[6]: src/storage/toon/mod.rs facade preserving the current storage API,src/storage/toon/files.rs for controller file creation and markdown read/write helpers,src/storage/toon/codec.rs for shared TOON read and write helpers,"src/storage/toon/controllers.rs for controller creation, listing, summaries, discovery, and timestamp helpers","src/storage/toon/ids.rs for normalization, uniqueness, fallback, and suffix helpers","Storage tests split between codec, discovery, and id modules"
dependencies[2]: guardrails,"model-modules"
verification[1]:
- label: Storage regression tests
commands[5]: "cargo test -q storage::toon::codec::tests","cargo test -q storage::toon::controllers::tests","cargo test -q storage::toon::ids::tests","cargo test -q storage::toon::tests","cargo test -q"
cleanup_requirements[3]{label,description}:
No format drift,"Keep persisted markdown and TOON content shape byte-compatible except for harmless formatting already produced by existing helpers."
Single codec path,Route all TOON encoding and decoding through shared codec helpers instead of leaving duplicate inline file logic.
"Preserve cwd-sensitive tests","Keep the existing cwd mutex and discovery test discipline intact so file-system behavior stays deterministic."
status: done
attempts: 1
- id: "app-runtime-modules"
title: Split App Runtime Lifecycle
purpose: "Refactor `src/app/runtime.rs` into focused modules for workspace lifecycle, runtime events, and usage refresh while keeping `App` behavior stable."
notes: "Completed. `src/app/runtime.rs` now routes through focused runtime modules with stable `impl App` entry points, expanded colocated tests, and a clean repository-wide verification pass after extraction."
inputs[5]: src/app/runtime.rs,src/app/mod.rs,src/process/mod.rs,src/storage/toon/mod.rs,src/app/session.rs
outputs[5]: src/app/runtime/mod.rs coordinating the runtime submodules,"src/app/runtime/workspace.rs for open, create, load, picker refresh, and shutdown flows",src/app/runtime/events.rs for draining and applying runtime events plus local session entry helpers,src/app/runtime/usage.rs for usage refresh and state persistence,"Expanded app tests covering event application, usage refresh, workspace open flows, and cached-session reconstruction"
dependencies[4]: guardrails,"model-modules","process-modules","storage-toon-modules"
verification[1]:
- label: App runtime regression tests
commands[5]: "cargo test -q app::runtime::events::tests","cargo test -q app::runtime::usage::tests","cargo test -q app::runtime::workspace::tests","cargo test -q app::tests","cargo test -q"
cleanup_requirements[3]{label,description}:
Stable App methods,Keep the current `impl App` method names and external call sites intact while moving their bodies into submodules.
Single hydration path,Avoid duplicating workspace state initialization and usage snapshot reconstruction across runtime modules.
"Contain runtime-only logic",Do not let runtime extraction leak storage or process implementation details into unrelated app modules.
status: done
attempts: 1
- id: "app-workspace-input-modules"
title: Split Workspace Input Handling
purpose: "Refactor `src/app/workspace_input.rs` into focused keyboard, mouse, command, and submission modules while preserving interaction behavior."
notes: "Next execution step. Runtime seams are now stable, so extract interaction logic by ownership boundary while keeping slash commands, planning-mode gating, drag selection, scroll behavior, follow-output resets, warning text, and submission side effects unchanged. Controller recovered this step from stale active state and returned it to todo."
inputs[5]: src/app/workspace_input.rs,src/app/mod.rs,src/app/tests.rs,src/ui/mod.rs,src/app/runtime/mod.rs
outputs[6]: src/app/workspace_input/mod.rs coordinating workspace input entry points,"src/app/workspace_input/mouse.rs for selection, drag, and wheel handling",src/app/workspace_input/keyboard.rs for key dispatch and navigation,src/app/workspace_input/commands.rs for slash command handling and planning mode gating,src/app/workspace_input/submission.rs for user message submission and local session entry creation,"Expanded colocated tests covering slash commands, selection, drag, scroll, follow-output reset behavior, and submission ordering"
dependencies[2]: guardrails,"app-runtime-modules"
verification[1]:
- label: Workspace input regression tests
commands[4]: "cargo test -q app::workspace_input::commands::tests","cargo test -q app::workspace_input::submission::tests","cargo test -q app::tests","cargo test -q"
cleanup_requirements[3]{label,description}:
Keep command strings stable,"Do not change existing slash commands, warning text, or planning-mode restrictions during the split."
No duplicated reset logic,"Centralize selection and follow-output resets instead of copying the same workspace cleanup code into each input module."
Preserve local entry creation,Keep local session entry generation and submission ordering identical so UI and persistence behavior do not drift.
status: active
attempts: 1
- id: "ui-modules"
title: Split UI Rendering Helpers
purpose: Refactor `src/ui/mod.rs` into focused rendering modules while preserving the current exported helpers and TUI behavior.
notes: "Start after workspace input extraction settles the app-facing seams. Keep `src/ui/mod.rs` as a thin facade, preserve wrapping and selection math exactly, and remove the dead `session_row_cells` helper or relocate its behavior into the owning session-rendering module so no dead-code warning survives."
inputs[5]: src/ui/mod.rs,src/ui/scroll.rs,src/app/mod.rs,src/model/mod.rs,src/app/workspace_input/mod.rs
outputs[9]: src/ui/mod.rs thin facade exporting the stable UI entry points,"src/ui/theme.rs for colors, shared styles, and shell block helpers","src/ui/layout.rs for `WorkspaceLayout`, `SessionView`, and layout calculations",src/ui/picker.rs for controller picker rendering,"src/ui/create_controller.rs for create-controller screen rendering",src/ui/workspace.rs for workspace screen orchestration,"src/ui/session.rs for session row rendering, wrapping, and selection extraction helpers","src/ui/sidebar.rs for plan board, status line, and composer helper rendering",UI tests updated to target the new owning modules without changing rendered behavior
dependencies[4]: guardrails,"model-modules","app-runtime-modules","app-workspace-input-modules"
verification[1]:
- label: UI regression tests
commands[4]: "cargo test -q ui::layout::tests","cargo test -q ui::session::tests","cargo test -q ui::tests","cargo test -q"
cleanup_requirements[3]{label,description}:
Thin facade,Keep `src/ui/mod.rs` limited to module wiring and stable reexports instead of leaving business logic there.
Preserve selection math,"Keep wrapping, selection clipping, and copied session text behavior identical after extraction."
Remove refactor leftovers,Delete dead rendering helpers and stale imports introduced or exposed by the module split.
status: todo
attempts: 0
- id: "final-integration"
title: Run Final Cleanup And Verification
purpose: "Reconcile imports and module wiring, remove leftover compatibility code, and run the full repository quality gate."
notes: "Final pass after the remaining app and UI splits. Clean wiring and refactor leftovers, confirm that stable APIs remain intact, and rerun the full required verification set after any final lint-driven cleanup that does not change behavior."
inputs[7]: src/model/mod.rs,src/process/mod.rs,src/storage/toon/mod.rs,src/app/runtime/mod.rs,src/app/workspace_input/mod.rs,src/ui/mod.rs,Cargo.toml
outputs[3]: Updated module declarations and imports across `src/`,"Removed dead helpers, stale imports, and compatibility shims left from the large-file split","Green formatting, test, and clippy verification for the refactor"
dependencies[6]: "model-modules","process-modules","storage-toon-modules","app-runtime-modules","app-workspace-input-modules","ui-modules"
verification[1]:
- label: Full verification
commands[3]: "cargo fmt --check","cargo test -q","cargo clippy -q --all-targets --all-features"
cleanup_requirements[3]{label,description}:
Remove leftovers,Delete obsolete inline helpers and transitional reexports once the new module structure is wired in cleanly.
Keep structure intentional,"Do not leave empty modules or one-off directories after the refactor is complete."
No warning regressions,"Do not introduce new dead-code or stale-import warnings as part of the refactor cleanup."
status: todo
attempts: 0

View File

@@ -1,9 +0,0 @@
# Standards
- Preserve existing behavior, controller orchestration, TUI interactions, and on-disk `.md` and `.toon` controller formats throughout the refactor.
- Prefer focused directory modules when a file mixes responsibilities or grows past roughly 300 lines, and keep `mod.rs` files as thin facades or reexport surfaces.
- Keep public call sites stable unless a narrower API is clearly better, using `pub use` reexports to avoid unnecessary churn.
- Split code by ownership boundary: model/state/schema concerns, process execution and parsing, TOON persistence and controller discovery, app runtime lifecycle, workspace input handling, and UI rendering helpers.
- Move or add focused tests with the code they protect, especially around model schemas, session grouping and selection, process parsing, storage discovery and id generation, runtime event handling, workspace commands, and UI rendering helpers.
- Remove dead helpers, stale imports, and compatibility layers that only mirror the old file layout.
- Finish with `cargo fmt --check`, `cargo test -q`, and `cargo clippy -q --all-targets --all-features` passing.

View File

@@ -1,43 +0,0 @@
version: 1
phase: executing
stop_reason: null
goal_status: "in-progress"
goal_revision: 2
current_step_id: null
iteration: 7
replan_required: false
completed_steps[5]: guardrails,"model-modules","process-modules","storage-toon-modules","app-runtime-modules"
blocked_steps[0]:
last_verification:
passed: true
summary: All commands passed
commands[3]: "cargo fmt --check","cargo test -q","cargo clippy -q --all-targets --all-features"
output[1]: "running 65 tests\n.................................................................\ntest result: ok. 65 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.01s"
last_cleanup_summary:
passed: true
summary: "Cleanup accepted for app-runtime-modules"
commands[0]:
output[3]: Kept runtime behavior stable by preserving the existing `impl App` entry points in `src/app/runtime/mod.rs` and moving only the implementation bodies behind module boundaries.,"Added focused runtime guardrail coverage in `src/app/runtime/events.rs`, `src/app/runtime/usage.rs`, and `src/app/runtime/workspace.rs` instead of expanding the catch-all app test file.","Folded in low-risk cleanup needed for a clean verification pass: collapsed the app event-poll branch in `src/app/mod.rs`, elided a needless lifetime in `src/process/parser.rs`, marked the UI-only helper in `src/ui/mod.rs` as test-only, and explicitly allowed the existing `AppEvent` enum layout instead of changing runtime payload behavior."
last_full_test_summary:
passed: true
summary: All commands passed
commands[4]: "cargo test -q app::runtime::events::tests","cargo test -q app::runtime::usage::tests","cargo test -q app::runtime::workspace::tests","cargo test -q app::tests"
output[4]: "running 3 tests\n...\ntest result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 62 filtered out; finished in 0.00s","running 2 tests\n..\ntest result: ok. 2 passed; 0 failed; 0 ignored; 0 measured; 63 filtered out; finished in 0.00s","running 1 test\n.\ntest result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 64 filtered out; finished in 0.00s","running 6 tests\n......\ntest result: ok. 6 passed; 0 failed; 0 ignored; 0 measured; 59 filtered out; finished in 0.00s"
history[5]{timestamp,kind,detail}:
"1775277691","step-complete",Completed guardrails
"1775278850","step-complete","Completed model-modules"
"1775279170","step-complete","Completed process-modules"
"1775279529","step-complete","Completed storage-toon-modules"
"1775279938","step-complete","Completed app-runtime-modules"
notes[8]: No actionable step remained and autonomous replan produced nothing.,"Recovered stale active step state for module-mosaic. Reset model-modules to todo.","Recovered stale active step state for module-mosaic. Reset model-modules to todo.",No actionable step remained and autonomous replan produced nothing.,"Recovered stale active step state for module-mosaic. Reset process-modules to todo.","Recovered stale active step state for module-mosaic. Reset storage-toon-modules to todo.","Recovered stale active step state for module-mosaic. Reset app-runtime-modules to todo.","Recovered stale active step state for module-mosaic. Reset app-workspace-input-modules to todo."
planning_session:
pending_question: null
transcript[4]{role,content}:
user,refactor large files to smaller more maintainable files
assistant,Planning completed
user,refactor large files
assistant,Planning completed
started_at: "1775275504"
last_usage_refresh_at: "1775280021"
last_usage_input_tokens: null
last_usage_output_tokens: null

View File

@@ -1,3 +0,0 @@
# Goal
Describe the goal for this controller.

View File

@@ -1,3 +0,0 @@
version: 1
goal_summary: No plan yet
steps[0]:

View File

@@ -1,5 +0,0 @@
# Standards
- Keep code maintainable.
- Avoid one-off hacks.
- Leave tests green.

View File

@@ -1,22 +0,0 @@
version: 1
phase: planning
stop_reason: null
goal_status: unknown
goal_revision: 0
current_step_id: null
iteration: 0
replan_required: false
completed_steps[0]:
blocked_steps[0]:
last_verification: null
last_cleanup_summary: null
last_full_test_summary: null
history[0]:
notes[0]:
planning_session:
pending_question: null
transcript[0]:
started_at: null
last_usage_refresh_at: null
last_usage_input_tokens: null
last_usage_output_tokens: null

View File

@@ -1,12 +0,0 @@
# Goal
Turn rough user prompts entered into the controller goal planner into clear, production-quality improvement briefs shaped by a cross-functional software team.
The controller should:
- reinterpret ambiguous or sloppy requests through the perspectives of an architect, product owner, senior engineer, QA engineer, and other relevant software roles;
- surface missing context, risks, constraints, edge cases, and acceptance criteria before execution begins;
- rewrite the original request into a coherent codebase-improvement prompt that is specific, technically credible, and ready for autonomous planning or implementation;
- prefer maintainable, incremental improvements over novelty or one-off solutions;
- produce outputs that help downstream agents make sound architectural, implementation, testing, and rollout decisions with minimal back-and-forth.
Success means a weak initial prompt becomes a well-scoped, team-reviewed execution brief with explicit goals, assumptions, constraints, risks, and verification expectations.

View File

@@ -1,89 +0,0 @@
version: 1
goal_summary: "Define a team-oriented planning controller that transforms rough prompts into implementation-ready improvement briefs using cross-functional software perspectives."
steps[6]:
- id: "step-01"
title: Audit Current Controller Artifacts
purpose: "Inspect the existing goal, standards, plan, and state files to replace placeholders and preserve any useful structure."
notes: "The current controller files are placeholder-heavy and need concrete intent before automation can rely on them. One or more commands failed"
inputs[4]: ".agent/controllers/teamwise-prompt-lens/goal.md",".agent/controllers/teamwise-prompt-lens/standards.md",".agent/controllers/teamwise-prompt-lens/plan.toon",".agent/controllers/teamwise-prompt-lens/state.toon"
outputs[3]: Confirmed file inventory,List of placeholder content to replace,Any existing TOON structure worth preserving
dependencies[0]:
verification[1]:
- label: Read current controller files
commands[4]: "sed -n '1,200p' .agent/controllers/teamwise-prompt-lens/goal.md","sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/standards.md","sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/plan.toon","sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/state.toon"
cleanup_requirements[1]{label,description}:
No stale placeholders,Remove generic placeholder text once the real controller intent is documented.
status: active
attempts: 5
- id: "step-02"
title: "Define Cross-Functional Prompt Lens"
purpose: "Specify the software-team roles and the exact review dimensions each role contributes to prompt improvement."
notes: The controller needs explicit personas so it consistently upgrades prompts instead of producing generic rewrites.
inputs[2]: "User request for architect, QA, senior engineer, product owner, and broader team input","Findings from step-01"
outputs[3]: Role list for the prompt lens,"Per-role review criteria",Rules for when to include or omit additional roles
dependencies[1]: "step-01"
verification[1]:
- label: Check role coverage in artifacts
commands[1]: "rg -n \"architect|product|senior engineer|qa|operations|security|performance\" .agent/controllers/teamwise-prompt-lens"
cleanup_requirements[1]{label,description}:
Avoid role sprawl,Keep the persona set opinionated and reusable rather than listing every possible specialty.
status: todo
attempts: 0
- id: "step-03"
title: Rewrite Goal And Standards
purpose: "Replace the placeholder Markdown with controller-specific guidance that matches the desired teamwise prompt transformation behavior."
notes: The goal and standards must be explicit because downstream planning quality depends on them.
inputs[2]: "Outputs from step-01","Outputs from step-02"
outputs[2]: "Updated .agent/controllers/teamwise-prompt-lens/goal.md","Updated .agent/controllers/teamwise-prompt-lens/standards.md"
dependencies[1]: "step-02"
verification[1]:
- label: Validate rewritten Markdown content
commands[3]: "sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/goal.md","sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/standards.md","rg -n \"Describe the goal for this controller|placeholder|TODO\" .agent/controllers/teamwise-prompt-lens/goal.md .agent/controllers/teamwise-prompt-lens/standards.md"
cleanup_requirements[1]{label,description}:
Keep standards actionable,Remove vague quality slogans unless they imply a concrete execution rule.
status: todo
attempts: 0
- id: "step-04"
title: Author Planner Workflow In TOON
purpose: "Encode the planning workflow so the controller consistently turns sloppy prompts into structured, execution-ready briefs."
notes: The main behavioral logic belongs in the plan file because the controller is operating in planning mode.
inputs[2]: Rewritten goal and standards,"Cross-functional prompt lens definition"
outputs[2]: "Updated .agent/controllers/teamwise-prompt-lens/plan.toon with ordered planning behavior","Explicit output sections for rewritten prompt, assumptions, risks, acceptance criteria, and verification"
dependencies[1]: "step-03"
verification[1]:
- label: Review plan structure
commands[2]: "sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/plan.toon","rg -n \"assumptions|risks|acceptance criteria|verification|rewrite|team\" .agent/controllers/teamwise-prompt-lens/plan.toon"
cleanup_requirements[1]{label,description}:
No dead branches,Remove unused workflow branches or duplicate instructions that would confuse autonomous execution.
status: todo
attempts: 0
- id: "step-05"
title: Initialize Stateful Planning Data
purpose: "Define the minimal controller state needed to track prompt quality, assumptions, open questions, and plan readiness across runs."
notes: State should stay minimal so the controller remains predictable and maintainable.
inputs[2]: "Planner workflow from step-04","Existing .agent/controllers/teamwise-prompt-lens/state.toon"
outputs[2]: "Updated .agent/controllers/teamwise-prompt-lens/state.toon","Stable state fields for prompt intake, role synthesis, assumptions, risks, and completion status"
dependencies[1]: "step-04"
verification[1]:
- label: Inspect state schema
commands[2]: "sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/state.toon","rg -n \"prompt|assumption|risk|question|ready|status\" .agent/controllers/teamwise-prompt-lens/state.toon"
cleanup_requirements[1]{label,description}:
Avoid overspecified state,Remove transient or redundant fields that do not support repeated planning runs.
status: todo
attempts: 0
- id: "step-06"
title: Validate With Representative Prompt Cases
purpose: Check that the controller can upgrade rough prompts into clearer briefs without losing user intent.
notes: A few realistic examples are the fastest way to catch missing sections or overcomplicated output rules.
inputs[2]: Updated controller artifacts,Representative sloppy prompts about codebase improvements
outputs[2]: Validation notes,"Any final wording adjustments to goal, standards, plan, or state"
dependencies[1]: "step-05"
verification[2]:
- label: Run artifact review against sample prompts
commands[4]: "sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/goal.md","sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/standards.md","sed -n '1,320p' .agent/controllers/teamwise-prompt-lens/plan.toon","sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/state.toon"
- label: Final placeholder sweep
commands[1]: "rg -n \"TODO|placeholder|Describe the goal for this controller|TBD\" .agent/controllers/teamwise-prompt-lens"
cleanup_requirements[1]{label,description}:
Remove ad hoc examples,"Do not leave validation-only sample prompts in production controller files unless intentionally documented."
status: todo
attempts: 0

View File

@@ -1,13 +0,0 @@
# Standards
- Treat every incoming prompt as incomplete until assumptions, constraints, and success criteria are made explicit.
- Synthesize perspectives from architecture, product, engineering, QA, and operations when they materially affect the outcome.
- Optimize for maintainable codebase improvements, not clever one-off patches.
- Preserve the user's core intent while upgrading precision, scope control, and technical quality.
- Make missing information visible as assumptions or open questions instead of silently inventing product or system behavior.
- Require clear deliverables, acceptance criteria, and verification expectations in the rewritten prompt.
- Call out risks, dependencies, migration concerns, and likely regression areas when relevant.
- Keep outputs concise enough for autonomous execution, but complete enough to avoid avoidable follow-up.
- Prefer incremental, reviewable changes that can keep tests green throughout execution.
- Eliminate placeholder language, vague directives, and non-actionable advice from controller artifacts.
- Leave tests green.

File diff suppressed because one or more lines are too long

View File

@@ -28,6 +28,12 @@ use crate::model::{
use crate::ui::{self, scroll::VerticalScrollState, SessionRenderRow, SessionView, SidebarView}; use crate::ui::{self, scroll::VerticalScrollState, SessionRenderRow, SessionView, SidebarView};
pub(crate) const USAGE_REFRESH_INTERVAL: Duration = Duration::from_secs(120); pub(crate) const USAGE_REFRESH_INTERVAL: Duration = Duration::from_secs(120);
pub(crate) const CREATE_MODELS: [&str; 4] = [
"gpt-5.4",
"gpt-5.4-mini",
"gpt-5.3-codex",
"gpt-5.3-codex-spark",
];
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[allow(clippy::large_enum_variant)] #[allow(clippy::large_enum_variant)]
@@ -86,6 +92,9 @@ pub struct App {
pub picker_items: Vec<crate::model::ControllerSummary>, pub picker_items: Vec<crate::model::ControllerSummary>,
pub picker_selected: usize, pub picker_selected: usize,
pub create_input: String, pub create_input: String,
pub create_model_index: usize,
pub create_fast_mode: bool,
pub create_allow_branching: bool,
pub create_error: Option<String>, pub create_error: Option<String>,
pub default_task_path: PathBuf, pub default_task_path: PathBuf,
pub(crate) frame_tick: u64, pub(crate) frame_tick: u64,
@@ -100,6 +109,9 @@ impl App {
picker_items: Vec::new(), picker_items: Vec::new(),
picker_selected: 0, picker_selected: 0,
create_input: String::new(), create_input: String::new(),
create_model_index: 0,
create_fast_mode: false,
create_allow_branching: false,
create_error: None, create_error: None,
default_task_path: default_task_path.clone(), default_task_path: default_task_path.clone(),
frame_tick: 0, frame_tick: 0,
@@ -144,7 +156,11 @@ impl App {
let workspace = self.workspace.as_ref()?; let workspace = self.workspace.as_ref()?;
Some(StatusSnapshot { Some(StatusSnapshot {
controller_id: workspace.task_config.controller_id(), controller_id: workspace.task_config.controller_id(),
branch: workspace.task_config.branch.clone(), branch: if workspace.state.allow_branching {
workspace.task_config.branch.clone()
} else {
"current".to_string()
},
started_at: workspace.state.started_at.clone(), started_at: workspace.state.started_at.clone(),
phase: workspace.state.phase.clone(), phase: workspace.state.phase.clone(),
iteration: workspace.state.iteration, iteration: workspace.state.iteration,
@@ -154,6 +170,25 @@ impl App {
}) })
} }
pub(crate) fn create_model(&self) -> &'static str {
CREATE_MODELS
.get(self.create_model_index)
.copied()
.unwrap_or(CREATE_MODELS[0])
}
pub(crate) fn cycle_create_model(&mut self) {
self.create_model_index = (self.create_model_index + 1) % CREATE_MODELS.len();
}
pub(crate) fn reset_create_form(&mut self) {
self.create_input.clear();
self.create_model_index = 0;
self.create_fast_mode = false;
self.create_allow_branching = false;
self.create_error = None;
}
pub(crate) fn workspace(&self) -> Option<&WorkspaceRuntime> { pub(crate) fn workspace(&self) -> Option<&WorkspaceRuntime> {
self.workspace.as_ref() self.workspace.as_ref()
} }

View File

@@ -24,13 +24,13 @@ impl App {
} }
KeyCode::Char('n') => { KeyCode::Char('n') => {
self.screen = Screen::CreateController; self.screen = Screen::CreateController;
self.create_error = None; self.reset_create_form();
Ok(false) Ok(false)
} }
KeyCode::Enter => { KeyCode::Enter => {
if self.picker_selected == self.picker_items.len() { if self.picker_selected == self.picker_items.len() {
self.screen = Screen::CreateController; self.screen = Screen::CreateController;
self.create_error = None; self.reset_create_form();
return Ok(false); return Ok(false);
} }
@@ -55,6 +55,21 @@ impl App {
self.create_error = None; self.create_error = None;
Ok(false) Ok(false)
} }
KeyCode::F(2) => {
self.cycle_create_model();
self.create_error = None;
Ok(false)
}
KeyCode::F(3) => {
self.create_fast_mode = !self.create_fast_mode;
self.create_error = None;
Ok(false)
}
KeyCode::F(4) => {
self.create_allow_branching = !self.create_allow_branching;
self.create_error = None;
Ok(false)
}
KeyCode::Backspace => { KeyCode::Backspace => {
self.create_input.pop(); self.create_input.pop();
self.create_error = None; self.create_error = None;
@@ -68,11 +83,15 @@ impl App {
return Ok(false); return Ok(false);
} }
match self.create_workspace_from_goal(goal.clone()) { let model = self.create_model().to_string();
let fast_mode = self.create_fast_mode;
let allow_branching = self.create_allow_branching;
match self.create_workspace_from_goal(goal.clone(), model, fast_mode, allow_branching)
{
Ok(()) => { Ok(()) => {
self.submit_workspace_input(goal)?; self.submit_workspace_input(goal)?;
self.create_input.clear(); self.reset_create_form();
self.create_error = None;
} }
Err(error) => { Err(error) => {
self.create_error = Some(error.to_string()); self.create_error = Some(error.to_string());

View File

@@ -193,6 +193,9 @@ mod tests {
picker_items: Vec::new(), picker_items: Vec::new(),
picker_selected: 0, picker_selected: 0,
create_input: String::new(), create_input: String::new(),
create_model_index: 0,
create_fast_mode: false,
create_allow_branching: false,
create_error: None, create_error: None,
default_task_path: PathBuf::from(DEFAULT_TASK_CONFIG_PATH), default_task_path: PathBuf::from(DEFAULT_TASK_CONFIG_PATH),
frame_tick: 0, frame_tick: 0,

View File

@@ -15,8 +15,20 @@ impl App {
workspace::open_workspace_from_task_file(self, task_path) workspace::open_workspace_from_task_file(self, task_path)
} }
pub(super) fn create_workspace_from_goal(&mut self, goal: String) -> Result<()> { pub(super) fn create_workspace_from_goal(
workspace::create_workspace_from_goal(self, goal) &mut self,
goal: String,
model: String,
fast_mode: bool,
allow_branching: bool,
) -> Result<()> {
workspace::create_workspace_from_goal_with_options(
self,
goal,
model,
fast_mode,
allow_branching,
)
} }
pub(super) fn open_workspace( pub(super) fn open_workspace(
@@ -39,6 +51,10 @@ impl App {
usage::maybe_refresh_usage(self) usage::maybe_refresh_usage(self)
} }
pub(super) fn refresh_usage_now(&mut self) -> Result<()> {
usage::refresh_usage_now(self)
}
pub(super) fn push_local_entry( pub(super) fn push_local_entry(
&mut self, &mut self,
source: SessionSource, source: SessionSource,

View File

@@ -14,14 +14,18 @@ pub(super) fn maybe_refresh_usage(app: &mut App) -> Result<()> {
return Ok(()); return Ok(());
} }
refresh_usage_now(app)
}
pub(super) fn refresh_usage_now(app: &mut App) -> Result<()> {
let Some(workspace) = app.workspace.as_mut() else {
return Ok(());
};
let snapshot = crate::process::refresh_usage_snapshot(&workspace.state); let snapshot = crate::process::refresh_usage_snapshot(&workspace.state);
workspace.last_usage_refresh = Instant::now(); workspace.last_usage_refresh = Instant::now();
workspace.usage_snapshot = snapshot.clone(); workspace.usage_snapshot = snapshot.clone();
workspace.state.last_usage_refresh_at = snapshot.refreshed_at.clone(); crate::process::persist_usage_snapshot(&mut workspace.state, &snapshot);
workspace.state.last_usage_input_tokens = snapshot.input_tokens;
workspace.state.last_usage_output_tokens = snapshot.output_tokens;
workspace.state.last_usage_primary_window = snapshot.primary.clone();
workspace.state.last_usage_secondary_window = snapshot.secondary.clone();
toon::write_state(&workspace.task_config.state_file, &workspace.state)?; toon::write_state(&workspace.task_config.state_file, &workspace.state)?;
Ok(()) Ok(())
} }
@@ -74,6 +78,9 @@ mod tests {
picker_items: Vec::new(), picker_items: Vec::new(),
picker_selected: 0, picker_selected: 0,
create_input: String::new(), create_input: String::new(),
create_model_index: 0,
create_fast_mode: false,
create_allow_branching: false,
create_error: None, create_error: None,
default_task_path: PathBuf::from(DEFAULT_TASK_CONFIG_PATH), default_task_path: PathBuf::from(DEFAULT_TASK_CONFIG_PATH),
frame_tick: 0, frame_tick: 0,

View File

@@ -19,13 +19,24 @@ pub(super) fn open_workspace_from_task_file(app: &mut App, task_path: PathBuf) -
open_workspace(app, config, Some(task_path)) open_workspace(app, config, Some(task_path))
} }
pub(super) fn create_workspace_from_goal(app: &mut App, goal: String) -> Result<()> { pub(super) fn create_workspace_from_goal_with_options(
app: &mut App,
goal: String,
model: String,
fast_mode: bool,
allow_branching: bool,
) -> Result<()> {
let suggested_id = let suggested_id =
crate::process::generate_controller_id(&repo::repo_root(), &goal).map_err(|error| { crate::process::generate_controller_id(&repo::repo_root(), &goal).map_err(|error| {
anyhow!("Failed to generate controller id with GPT-5.4 mini: {error:#}") anyhow!("Failed to generate controller id with GPT-5.4 mini: {error:#}")
})?; })?;
let controller_id = toon::make_unique_controller_id(&suggested_id); let controller_id = toon::make_unique_controller_id(&suggested_id);
let config = toon::create_controller(&app.default_task_path, &controller_id)?; let config = toon::create_controller(&app.default_task_path, &controller_id)?;
let mut state = toon::read_state(&config.state_file)?;
state.run_model = model;
state.fast_mode = fast_mode;
state.allow_branching = allow_branching;
toon::write_state(&config.state_file, &state)?;
open_workspace(app, config, Some(app.default_task_path.clone())) open_workspace(app, config, Some(app.default_task_path.clone()))
} }
@@ -82,6 +93,7 @@ pub(super) fn open_workspace(
session_drag_active: false, session_drag_active: false,
}); });
app.screen = Screen::Workspace; app.screen = Screen::Workspace;
app.refresh_usage_now()?;
refresh_picker(app)?; refresh_picker(app)?;
Ok(()) Ok(())
} }
@@ -171,6 +183,9 @@ mod tests {
picker_items: Vec::new(), picker_items: Vec::new(),
picker_selected: 3, picker_selected: 3,
create_input: String::new(), create_input: String::new(),
create_model_index: 0,
create_fast_mode: false,
create_allow_branching: false,
create_error: None, create_error: None,
default_task_path, default_task_path,
frame_tick: 0, frame_tick: 0,

View File

@@ -35,6 +35,9 @@ fn sample_app_with_control_rx() -> (App, Receiver<ControlCommand>) {
}], }],
picker_selected: 0, picker_selected: 0,
create_input: String::new(), create_input: String::new(),
create_model_index: 0,
create_fast_mode: false,
create_allow_branching: false,
create_error: None, create_error: None,
default_task_path: PathBuf::from(DEFAULT_TASK_CONFIG_PATH), default_task_path: PathBuf::from(DEFAULT_TASK_CONFIG_PATH),
frame_tick: 0, frame_tick: 0,
@@ -96,6 +99,24 @@ fn planning_mode_blocks_slash_commands() {
assert!(last.body.contains("Slash commands")); assert!(last.body.contains("Slash commands"));
} }
#[test]
fn create_screen_shortcuts_update_run_options() {
let mut app = sample_app();
app.workspace = None;
app.screen = Screen::CreateController;
app.handle_create_key(KeyEvent::from(KeyCode::F(2)))
.expect("cycle model");
app.handle_create_key(KeyEvent::from(KeyCode::F(3)))
.expect("toggle fast");
app.handle_create_key(KeyEvent::from(KeyCode::F(4)))
.expect("toggle branching");
assert_eq!(app.create_model(), "gpt-5.4-mini");
assert!(app.create_fast_mode);
assert!(app.create_allow_branching);
}
#[test] #[test]
fn status_command_reports_current_workspace_progress() { fn status_command_reports_current_workspace_progress() {
let mut app = sample_app(); let mut app = sample_app();

View File

@@ -35,6 +35,7 @@ pub fn runtime_loop(
} }
let goal_md = toon::read_markdown(&config.goal_file)?; let goal_md = toon::read_markdown(&config.goal_file)?;
let standards_md = toon::read_markdown(&config.standards_file)?; let standards_md = toon::read_markdown(&config.standards_file)?;
refresh_usage_state(&mut state);
emit_snapshot(&event_tx, &goal_md, &standards_md, &plan, &state); emit_snapshot(&event_tx, &goal_md, &standards_md, &plan, &state);
match control_rx.try_recv() { match control_rx.try_recv() {
@@ -186,11 +187,12 @@ pub fn runtime_loop(
plan.mark_active(&step.id); plan.mark_active(&step.id);
state.current_step_id = Some(step.id.clone()); state.current_step_id = Some(step.id.clone());
state.iteration += 1; state.iteration += 1;
refresh_usage_state(&mut state);
toon::write_plan(&config.plan_file, &plan)?; toon::write_plan(&config.plan_file, &plan)?;
toon::write_state(&config.state_file, &state)?; toon::write_state(&config.state_file, &state)?;
emit_snapshot(&event_tx, &goal_md, &standards_md, &plan, &state); emit_snapshot(&event_tx, &goal_md, &standards_md, &plan, &state);
let exec = executor::implement(&repo_root, &config, &plan, &step, &event_tx)?; let exec = executor::implement(&repo_root, &config, &state, &plan, &step, &event_tx)?;
if goal_checker::needs_goal_clarification(&exec) { if goal_checker::needs_goal_clarification(&exec) {
state.phase = ControllerPhase::Planning; state.phase = ControllerPhase::Planning;
state.set_stop_reason(format!( state.set_stop_reason(format!(
@@ -253,6 +255,11 @@ pub fn runtime_loop(
Ok(()) Ok(())
} }
fn refresh_usage_state(state: &mut crate::model::ControllerState) {
let snapshot = crate::process::refresh_usage_snapshot(state);
crate::process::persist_usage_snapshot(state, &snapshot);
}
fn emit_snapshot( fn emit_snapshot(
event_tx: &Sender<AppEvent>, event_tx: &Sender<AppEvent>,
goal_md: &str, goal_md: &str,
@@ -291,12 +298,25 @@ fn recover_stale_execution_state(
state: &mut crate::model::ControllerState, state: &mut crate::model::ControllerState,
event_tx: &Sender<AppEvent>, event_tx: &Sender<AppEvent>,
) -> Result<bool> { ) -> Result<bool> {
if state.current_step_id.is_some() { let current_step_id = state.current_step_id.clone();
let has_stale_current_step = if let Some(current_step_id) = &current_step_id {
!plan.steps.iter().any(|step| {
step.id == *current_step_id
&& matches!(
step.status,
StepStatus::Todo | StepStatus::Active | StepStatus::Blocked
)
})
} else {
false
};
if !has_stale_current_step && state.current_step_id.is_some() {
return Ok(false); return Ok(false);
} }
let active_steps = plan.active_step_ids(); let active_steps = plan.active_step_ids();
if active_steps.is_empty() { if !has_stale_current_step && active_steps.is_empty() {
return Ok(false); return Ok(false);
} }
@@ -312,11 +332,28 @@ fn recover_stale_execution_state(
state.goal_status = GoalStatus::InProgress; state.goal_status = GoalStatus::InProgress;
state.clear_stop_reason(); state.clear_stop_reason();
state.replan_required = false; state.replan_required = false;
let reason = format!( state.current_step_id = None;
let reason = if has_stale_current_step && !active_steps.is_empty() {
format!(
"Recovered stale execution state for {}. Cleared current_step_id {}. Reset {} to todo.",
config.controller_id(),
current_step_id.unwrap_or_default(),
active_steps.join(", ")
)
} else if has_stale_current_step {
format!(
"Recovered stale execution state for {}. Cleared current_step_id {}.",
config.controller_id(),
current_step_id.unwrap_or_default()
)
} else {
format!(
"Recovered stale active step state for {}. Reset {} to todo.", "Recovered stale active step state for {}. Reset {} to todo.",
config.controller_id(), config.controller_id(),
active_steps.join(", ") active_steps.join(", ")
); )
};
state.notes.push(reason.clone()); state.notes.push(reason.clone());
toon::write_plan(&config.plan_file, plan)?; toon::write_plan(&config.plan_file, plan)?;
toon::write_state(&config.state_file, state)?; toon::write_state(&config.state_file, state)?;
@@ -389,6 +426,53 @@ mod tests {
} }
} }
#[test]
fn recovers_stale_current_step_reference() {
let temp = tempdir().expect("tempdir");
let mut config = TaskConfig::default_for("stale-current");
let root = temp.path().join(".agent/controllers/stale-current");
config.goal_file = root.join("goal.md");
config.plan_file = root.join("plan.toon");
config.state_file = root.join("state.toon");
config.standards_file = root.join("standards.md");
let mut plan = Plan {
version: 1,
goal_summary: "goal".to_string(),
steps: vec![PlanStep {
id: "s1".to_string(),
title: "Scope".to_string(),
status: StepStatus::Done,
..PlanStep::default()
}],
};
let mut state = ControllerState {
phase: ControllerPhase::Blocked,
goal_status: GoalStatus::Blocked,
current_step_id: Some("s1".to_string()),
..ControllerState::default()
};
toon::ensure_controller_files(&config).expect("ensure files");
let (event_tx, event_rx) = mpsc::channel();
let recovered = recover_stale_execution_state(&config, &mut plan, &mut state, &event_tx)
.expect("recover");
assert!(recovered);
assert!(matches!(state.current_step_id, None));
assert!(matches!(state.phase, ControllerPhase::Executing));
assert!(matches!(state.goal_status, GoalStatus::InProgress));
assert!(state.stop_reason.is_none());
let event = event_rx.recv().expect("notice event");
match event {
AppEvent::Session(entry) => {
assert!(entry.body.contains("Cleared current_step_id s1"));
}
other => panic!("unexpected event: {other:?}"),
}
}
#[test] #[test]
fn resumable_step_prefers_current_blocked_or_active_step() { fn resumable_step_prefers_current_blocked_or_active_step() {
let plan = Plan { let plan = Plan {

View File

@@ -4,7 +4,7 @@ use anyhow::Result;
use serde_json::{json, Value}; use serde_json::{json, Value};
use crate::app::AppEvent; use crate::app::AppEvent;
use crate::model::{ExecutionResponse, Plan, PlanStep, SessionSource, TaskConfig}; use crate::model::{ControllerState, ExecutionResponse, Plan, PlanStep, SessionSource, TaskConfig};
use crate::process; use crate::process;
use crate::prompt; use crate::prompt;
use crate::storage::toon; use crate::storage::toon;
@@ -12,6 +12,7 @@ use crate::storage::toon;
pub fn implement( pub fn implement(
repo_root: &std::path::Path, repo_root: &std::path::Path,
config: &TaskConfig, config: &TaskConfig,
state: &ControllerState,
plan: &Plan, plan: &Plan,
step: &PlanStep, step: &PlanStep,
event_tx: &Sender<AppEvent>, event_tx: &Sender<AppEvent>,
@@ -31,11 +32,19 @@ pub fn implement(
"- Keep output terse. Use short summaries and short notes.\n", "- Keep output terse. Use short summaries and short notes.\n",
"- If the requested change is already present, return done.\n", "- If the requested change is already present, return done.\n",
"- If the goal is genuinely ambiguous, set needs_goal_clarification=true.\n\n", "- If the goal is genuinely ambiguous, set needs_goal_clarification=true.\n\n",
"Branching:\n{branching}\n\n",
"Run mode:\n{run_mode}\n\n",
"Return empty arrays for verification_commands, test_commands, or notes when not needed.\n\n", "Return empty arrays for verification_commands, test_commands, or notes when not needed.\n\n",
"Goal summary:\n{goal}\n\n", "Goal summary:\n{goal}\n\n",
"Standards summary:\n{standards}\n\n", "Standards summary:\n{standards}\n\n",
"Execution context:\n{context}\n" "Execution context:\n{context}\n"
), ),
branching = branching_instruction(config, state),
run_mode = if state.fast_mode {
"fast mode enabled; favor the narrowest sufficient inspection and verification"
} else {
"normal mode"
},
goal = prompt::compact_markdown(&goal_md, 8, 1200), goal = prompt::compact_markdown(&goal_md, 8, 1200),
standards = prompt::compact_markdown(&standards_md, 10, 1200), standards = prompt::compact_markdown(&standards_md, 10, 1200),
context = serde_json::to_string_pretty(&context)?, context = serde_json::to_string_pretty(&context)?,
@@ -58,6 +67,7 @@ pub fn implement(
repo_root, repo_root,
&prompt, &prompt,
&schema, &schema,
state.run_model(),
event_tx, event_tx,
SessionSource::Executor, SessionSource::Executor,
Some(step.id.clone()), Some(step.id.clone()),
@@ -65,6 +75,17 @@ pub fn implement(
Ok(serde_json::from_str(&raw)?) Ok(serde_json::from_str(&raw)?)
} }
fn branching_instruction(config: &TaskConfig, state: &ControllerState) -> String {
if state.allow_branching {
format!(
"branch creation or switching is allowed only when necessary; preferred branch is {}",
config.branch
)
} else {
"do not create, switch, or rename git branches; stay on the current branch".to_string()
}
}
fn build_execution_context(plan: &Plan, step: &PlanStep) -> Value { fn build_execution_context(plan: &Plan, step: &PlanStep) -> Value {
let dependency_steps = step let dependency_steps = step
.dependencies .dependencies

View File

@@ -31,12 +31,25 @@ pub fn refine_without_user_input(
"Use step_updates only for new or changed steps.\n", "Use step_updates only for new or changed steps.\n",
"Use remove_step_ids only for steps that should be deleted.\n", "Use remove_step_ids only for steps that should be deleted.\n",
"Use pending_step_order only when pending-step order should change; otherwise return an empty array.\n", "Use pending_step_order only when pending-step order should change; otherwise return an empty array.\n",
"Do not propose branch creation or branch switching unless branching is explicitly allowed below.\n",
"Return only the delta object.\n\n", "Return only the delta object.\n\n",
"Branching:\n{branching}\n\n",
"Run mode:\n{run_mode}\n\n",
"Goal summary:\n{goal}\n\n", "Goal summary:\n{goal}\n\n",
"Standards summary:\n{standards}\n\n", "Standards summary:\n{standards}\n\n",
"Current plan context:\n{plan}\n\n", "Current plan context:\n{plan}\n\n",
"Current state:\n{state}\n" "Current state:\n{state}\n"
), ),
branching = if state.allow_branching {
format!("branching allowed if clearly helpful; preferred branch is {}", config.branch)
} else {
"branching disabled; stay on the current branch".to_string()
},
run_mode = if state.fast_mode {
"fast mode enabled; prefer fewer, broader steps and minimal delta output"
} else {
"normal mode"
},
goal = prompt::compact_markdown(&goal_md, 8, 1200), goal = prompt::compact_markdown(&goal_md, 8, 1200),
standards = prompt::compact_markdown(&standards_md, 10, 1200), standards = prompt::compact_markdown(&standards_md, 10, 1200),
plan = serde_json::to_string_pretty(&build_replan_context(plan, state))?, plan = serde_json::to_string_pretty(&build_replan_context(plan, state))?,
@@ -47,6 +60,7 @@ pub fn refine_without_user_input(
repo_root, repo_root,
&prompt, &prompt,
&schema, &schema,
state.run_model(),
event_tx, event_tx,
SessionSource::Planner, SessionSource::Planner,
Some(config.controller_id()), Some(config.controller_id()),

View File

@@ -1,7 +1,11 @@
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use super::{PlanStep, UsageWindow}; use super::{PlanStep, UsageWindow};
pub const DEFAULT_RUN_MODEL: &str = "gpt-5.4";
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Screen { pub enum Screen {
ControllerPicker, ControllerPicker,
@@ -120,6 +124,10 @@ pub struct ControllerState {
pub last_full_test_summary: Option<TestSummary>, pub last_full_test_summary: Option<TestSummary>,
pub history: Vec<HistoryEvent>, pub history: Vec<HistoryEvent>,
pub notes: Vec<String>, pub notes: Vec<String>,
#[serde(default)]
pub planning_rejection_counters: BTreeMap<String, u32>,
#[serde(default)]
pub planning_annotation_counters: BTreeMap<String, u32>,
pub planning_session: PlanningSessionMeta, pub planning_session: PlanningSessionMeta,
pub started_at: Option<String>, pub started_at: Option<String>,
pub last_usage_refresh_at: Option<String>, pub last_usage_refresh_at: Option<String>,
@@ -127,6 +135,16 @@ pub struct ControllerState {
pub last_usage_output_tokens: Option<u64>, pub last_usage_output_tokens: Option<u64>,
pub last_usage_primary_window: Option<UsageWindow>, pub last_usage_primary_window: Option<UsageWindow>,
pub last_usage_secondary_window: Option<UsageWindow>, pub last_usage_secondary_window: Option<UsageWindow>,
#[serde(default = "default_run_model")]
pub run_model: String,
#[serde(default)]
pub fast_mode: bool,
#[serde(default)]
pub allow_branching: bool,
}
fn default_run_model() -> String {
DEFAULT_RUN_MODEL.to_string()
} }
impl Default for ControllerState { impl Default for ControllerState {
@@ -147,6 +165,8 @@ impl Default for ControllerState {
last_full_test_summary: None, last_full_test_summary: None,
history: Vec::new(), history: Vec::new(),
notes: Vec::new(), notes: Vec::new(),
planning_rejection_counters: BTreeMap::new(),
planning_annotation_counters: BTreeMap::new(),
planning_session: PlanningSessionMeta::default(), planning_session: PlanningSessionMeta::default(),
started_at: None, started_at: None,
last_usage_refresh_at: None, last_usage_refresh_at: None,
@@ -154,6 +174,9 @@ impl Default for ControllerState {
last_usage_output_tokens: None, last_usage_output_tokens: None,
last_usage_primary_window: None, last_usage_primary_window: None,
last_usage_secondary_window: None, last_usage_secondary_window: None,
run_model: default_run_model(),
fast_mode: false,
allow_branching: false,
} }
} }
} }
@@ -171,6 +194,15 @@ impl ControllerState {
self.stop_reason = None; self.stop_reason = None;
} }
pub fn run_model(&self) -> &str {
let model = self.run_model.trim();
if model.is_empty() {
DEFAULT_RUN_MODEL
} else {
model
}
}
pub fn latest_notice(&self) -> Option<String> { pub fn latest_notice(&self) -> Option<String> {
self.stop_reason self.stop_reason
.clone() .clone()
@@ -205,6 +237,32 @@ impl ControllerState {
}) })
} }
pub fn planning_guardrail_counter_key(stage: &str, failure_type: &str) -> String {
format!("{stage}:{failure_type}")
}
pub fn increment_planning_rejection_counter(
&mut self,
stage: &str,
failure_type: &str,
) -> u32 {
let key = Self::planning_guardrail_counter_key(stage, failure_type);
let value = self.planning_rejection_counters.entry(key).or_insert(0);
*value = value.saturating_add(1);
*value
}
pub fn increment_planning_annotation_counter(
&mut self,
stage: &str,
failure_type: &str,
) -> u32 {
let key = Self::planning_guardrail_counter_key(stage, failure_type);
let value = self.planning_annotation_counters.entry(key).or_insert(0);
*value = value.saturating_add(1);
*value
}
pub fn phase_notice(&self) -> Option<String> { pub fn phase_notice(&self) -> Option<String> {
match self.phase { match self.phase {
ControllerPhase::Blocked => Some( ControllerPhase::Blocked => Some(

View File

@@ -9,6 +9,7 @@ mod usage;
pub use self::controller::{ pub use self::controller::{
CleanupSummary, CommandSummary, ControllerPhase, ControllerState, GoalStatus, HistoryEvent, CleanupSummary, CommandSummary, ControllerPhase, ControllerState, GoalStatus, HistoryEvent,
PlanningSessionMeta, PlanningTurn, Screen, StepStatus, TestSummary, VerificationSummary, PlanningSessionMeta, PlanningTurn, Screen, StepStatus, TestSummary, VerificationSummary,
DEFAULT_RUN_MODEL,
}; };
#[allow(unused_imports)] #[allow(unused_imports)]
pub use self::plan::{ pub use self::plan::{
@@ -17,7 +18,16 @@ pub use self::plan::{
#[allow(unused_imports)] #[allow(unused_imports)]
pub use self::response::{ControllerSummary, ExecutionResponse, PlannerResponse}; pub use self::response::{ControllerSummary, ExecutionResponse, PlannerResponse};
#[allow(unused_imports)] #[allow(unused_imports)]
pub use self::response::{
GOAL_PLANNING_CONTRACT_VERSION, LegacyOutputProjection, PLANNING_QUALITY_GATE_VERSION,
PlanningConflictRule, PlanningConflictStrategy, PlanningContract, PlanningQualityDecisionCode,
PlanningQualityGate, PlanningPersona, PlanningPersonaEvidence, PlanningPersonaPass,
LEGACY_GOAL_PLANNING_CONTRACT_VERSION,
};
#[allow(unused_imports)]
pub use self::schema::{ pub use self::schema::{
planner_contract_schema, planning_conflict_rule_schema, planning_conflict_strategy_schema,
planning_contract_schema, planning_persona_pass_schema, planning_persona_schema,
cleanup_rule_schema, plan_delta_schema, plan_schema, plan_step_schema, cleanup_rule_schema, plan_delta_schema, plan_schema, plan_step_schema,
verification_check_schema, verification_check_schema,
}; };

View File

@@ -2,6 +2,135 @@ use serde::{Deserialize, Serialize};
use super::{ControllerPhase, Plan}; use super::{ControllerPhase, Plan};
pub const GOAL_PLANNING_CONTRACT_VERSION: u32 = 1;
pub const LEGACY_GOAL_PLANNING_CONTRACT_VERSION: u32 = 0;
pub const PLANNING_QUALITY_GATE_VERSION: u32 = 1;
fn default_planning_contract_version() -> u32 {
LEGACY_GOAL_PLANNING_CONTRACT_VERSION
}
fn default_planning_quality_gate() -> PlanningQualityGate {
PlanningQualityGate::default()
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum PlanningPersona {
ProductOwner,
SeniorEngineer,
SeniorMaintainer,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum PlanningConflictStrategy {
LatestStageWins,
AppendUnique,
Replace,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct PlanningConflictRule {
pub field: String,
pub strategy: PlanningConflictStrategy,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct PlanningPersonaEvidence {
pub facts: Vec<String>,
pub assumptions: Vec<String>,
pub questions: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct PlanningPersonaPass {
pub persona: PlanningPersona,
pub intent: String,
pub constraints: Vec<String>,
pub risks: Vec<String>,
pub acceptance_criteria: Vec<String>,
pub evidence: PlanningPersonaEvidence,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct PlanningContract {
pub contract_version: u32,
pub ordered_personas: Vec<PlanningPersona>,
pub conflict_rules: Vec<PlanningConflictRule>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct LegacyOutputProjection {
pub goal_md_stage: PlanningPersona,
pub standards_md_stage: PlanningPersona,
pub plan_stage: PlanningPersona,
}
impl Default for PlanningContract {
fn default() -> Self {
Self {
contract_version: GOAL_PLANNING_CONTRACT_VERSION,
ordered_personas: vec![
PlanningPersona::ProductOwner,
PlanningPersona::SeniorEngineer,
PlanningPersona::SeniorMaintainer,
],
conflict_rules: vec![
PlanningConflictRule {
field: "goal_md".to_string(),
strategy: PlanningConflictStrategy::LatestStageWins,
},
PlanningConflictRule {
field: "standards_md".to_string(),
strategy: PlanningConflictStrategy::LatestStageWins,
},
PlanningConflictRule {
field: "plan".to_string(),
strategy: PlanningConflictStrategy::Replace,
},
PlanningConflictRule {
field: "constraints".to_string(),
strategy: PlanningConflictStrategy::AppendUnique,
},
PlanningConflictRule {
field: "risks".to_string(),
strategy: PlanningConflictStrategy::AppendUnique,
},
],
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub enum PlanningQualityDecisionCode {
Accept,
Downgraded,
Blocked,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct PlanningQualityGate {
pub quality_gate_version: u32,
pub decision_code: PlanningQualityDecisionCode,
#[serde(default)]
pub rationale_codes: Vec<String>,
#[serde(default)]
pub rationale: Vec<String>,
}
impl Default for PlanningQualityGate {
fn default() -> Self {
Self {
quality_gate_version: PLANNING_QUALITY_GATE_VERSION,
decision_code: PlanningQualityDecisionCode::Accept,
rationale_codes: Vec::new(),
rationale: Vec::new(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PlannerResponse { pub struct PlannerResponse {
pub kind: String, pub kind: String,
@@ -9,6 +138,16 @@ pub struct PlannerResponse {
pub goal_md: Option<String>, pub goal_md: Option<String>,
pub standards_md: Option<String>, pub standards_md: Option<String>,
pub plan: Option<Plan>, pub plan: Option<Plan>,
#[serde(default = "default_planning_contract_version")]
pub planning_contract_version: u32,
#[serde(default)]
pub contract: Option<PlanningContract>,
#[serde(default)]
pub persona_passes: Vec<PlanningPersonaPass>,
#[serde(default)]
pub single_pass_projection: Option<LegacyOutputProjection>,
#[serde(default = "default_planning_quality_gate")]
pub quality_gate: PlanningQualityGate,
} }
#[derive(Debug, Clone, Serialize, Deserialize, Default)] #[derive(Debug, Clone, Serialize, Deserialize, Default)]

View File

@@ -15,6 +15,171 @@ pub fn verification_check_schema() -> Value {
}) })
} }
pub fn planning_persona_schema() -> Value {
json!({
"type": "string",
"enum": ["product-owner", "senior-engineer", "senior-maintainer"]
})
}
pub fn planning_quality_decision_schema() -> Value {
json!({
"type": "string",
"enum": ["accept", "downgraded", "blocked"]
})
}
pub fn planning_quality_gate_schema() -> Value {
json!({
"type": "object",
"additionalProperties": false,
"required": ["quality_gate_version", "decision_code", "rationale_codes", "rationale"],
"properties": {
"quality_gate_version": { "type": "integer" },
"decision_code": planning_quality_decision_schema(),
"rationale_codes": { "type": "array", "items": { "type": "string" } },
"rationale": { "type": "array", "items": { "type": "string" } }
}
})
}
pub fn planning_conflict_strategy_schema() -> Value {
json!({
"type": "string",
"enum": ["latest-stage-wins", "append-unique", "replace"]
})
}
pub fn planning_conflict_rule_schema() -> Value {
json!({
"type": "object",
"additionalProperties": false,
"required": ["field", "strategy"],
"properties": {
"field": { "type": "string" },
"strategy": planning_conflict_strategy_schema()
}
})
}
pub fn planning_persona_evidence_schema() -> Value {
json!({
"type": "object",
"additionalProperties": false,
"required": ["facts", "assumptions", "questions"],
"properties": {
"facts": { "type": "array", "items": { "type": "string" } },
"assumptions": { "type": "array", "items": { "type": "string" } },
"questions": { "type": "array", "items": { "type": "string" } }
}
})
}
pub fn planning_persona_pass_schema() -> Value {
json!({
"type": "object",
"additionalProperties": false,
"required": [
"persona",
"intent",
"constraints",
"risks",
"acceptance_criteria",
"evidence"
],
"properties": {
"persona": planning_persona_schema(),
"intent": { "type": "string" },
"constraints": { "type": "array", "items": { "type": "string" } },
"risks": { "type": "array", "items": { "type": "string" } },
"acceptance_criteria": { "type": "array", "items": { "type": "string" } },
"evidence": planning_persona_evidence_schema()
}
})
}
pub fn planning_contract_schema() -> Value {
json!({
"type": "object",
"additionalProperties": false,
"required": ["contract_version", "ordered_personas", "conflict_rules"],
"properties": {
"contract_version": { "type": "integer" },
"ordered_personas": {
"type": "array",
"items": planning_persona_schema(),
"minItems": 3,
"maxItems": 3
},
"conflict_rules": {
"type": "array",
"items": planning_conflict_rule_schema()
}
}
})
}
pub fn legacy_output_projection_schema() -> Value {
json!({
"type": "object",
"additionalProperties": false,
"required": ["goal_md_stage", "standards_md_stage", "plan_stage"],
"properties": {
"goal_md_stage": planning_persona_schema(),
"standards_md_stage": planning_persona_schema(),
"plan_stage": planning_persona_schema()
}
})
}
pub fn planner_contract_schema() -> Value {
json!({
"type": "object",
"additionalProperties": false,
"required": [
"kind",
"question",
"goal_md",
"standards_md",
"plan",
"planning_contract_version",
"contract",
"persona_passes",
"single_pass_projection"
],
"properties": {
"kind": { "type": "string", "enum": ["question", "final"] },
"question": { "type": ["string", "null"] },
"goal_md": { "type": ["string", "null"] },
"standards_md": { "type": ["string", "null"] },
"plan": {
"anyOf": [
plan_schema(),
{ "type": "null" }
]
},
"planning_contract_version": { "type": "integer" },
"contract": planning_contract_schema(),
"persona_passes": {
"type": "array",
"items": planning_persona_pass_schema()
},
"quality_gate": {
"anyOf": [
planning_quality_gate_schema(),
{ "type": "null" }
]
},
"single_pass_projection": {
"anyOf": [
legacy_output_projection_schema(),
{ "type": "null" }
]
}
}
})
}
pub fn cleanup_rule_schema() -> Value { pub fn cleanup_rule_schema() -> Value {
json!({ json!({
"type": "object", "type": "object",
@@ -158,4 +323,18 @@ mod tests {
false false
); );
} }
#[test]
fn planner_contract_schema_carries_three_pass_contract_fields() {
let schema = planner_contract_schema();
assert_eq!(
schema["required"],
json!(["kind","question","goal_md","standards_md","plan","planning_contract_version","contract","persona_passes","single_pass_projection"])
);
assert_eq!(
schema["properties"]["contract"]["required"],
json!(["contract_version","ordered_personas","conflict_rules"])
);
assert!(schema["properties"]["quality_gate"].is_object());
}
} }

File diff suppressed because it is too large Load Diff

View File

@@ -3,8 +3,7 @@ use std::sync::mpsc::Sender;
use anyhow::Result; use anyhow::Result;
use crate::app::AppEvent; use crate::app::AppEvent;
use crate::model::{ControllerPhase, PlannerResponse, PlanningTurn, SessionSource, TaskConfig}; use crate::model::{ControllerPhase, PlannerResponse, PlanningTurn, TaskConfig};
use crate::process;
use crate::storage::toon; use crate::storage::toon;
pub fn advance( pub fn advance(
@@ -16,6 +15,7 @@ pub fn advance(
let mut state = toon::read_state(&config.state_file)?; let mut state = toon::read_state(&config.state_file)?;
let goal_md = toon::read_markdown(&config.goal_file)?; let goal_md = toon::read_markdown(&config.goal_file)?;
let standards_md = toon::read_markdown(&config.standards_file)?; let standards_md = toon::read_markdown(&config.standards_file)?;
let plan = toon::read_plan(&config.plan_file)?;
state.phase = ControllerPhase::Planning; state.phase = ControllerPhase::Planning;
state.clear_stop_reason(); state.clear_stop_reason();
@@ -25,22 +25,16 @@ pub fn advance(
}); });
toon::write_state(&config.state_file, &state)?; toon::write_state(&config.state_file, &state)?;
let prompt = crate::planning::forwarder::build_planning_prompt( let response = crate::planning::forwarder::run_planning_pipeline(
repo_root,
config, config,
&mut state,
&goal_md, &goal_md,
&standards_md, &standards_md,
&state, &plan,
latest_user_input, latest_user_input,
);
let raw = process::run_codex_with_schema(
repo_root,
&prompt,
&crate::planning::forwarder::planning_schema(),
event_tx, event_tx,
SessionSource::Planner,
Some(config.controller_id()),
)?; )?;
let response = crate::planning::forwarder::parse_planning_response(&raw)?;
match response.kind.as_str() { match response.kind.as_str() {
"question" => { "question" => {

View File

@@ -19,6 +19,7 @@ pub fn run_codex_with_schema(
repo_root: &Path, repo_root: &Path,
prompt: &str, prompt: &str,
schema: &Value, schema: &Value,
model: &str,
event_tx: &Sender<AppEvent>, event_tx: &Sender<AppEvent>,
source: SessionSource, source: SessionSource,
tag: Option<String>, tag: Option<String>,
@@ -34,6 +35,8 @@ pub fn run_codex_with_schema(
.arg("-C") .arg("-C")
.arg(repo_root) .arg(repo_root)
.arg("--full-auto") .arg("--full-auto")
.arg("--model")
.arg(model)
.arg("--color") .arg("--color")
.arg("never") .arg("never")
.arg("--output-schema") .arg("--output-schema")

View File

@@ -5,4 +5,4 @@ mod usage;
pub use self::codex::{generate_controller_id, run_codex_with_schema}; pub use self::codex::{generate_controller_id, run_codex_with_schema};
pub use self::shell::run_shell_commands; pub use self::shell::run_shell_commands;
pub use self::usage::refresh_usage_snapshot; pub use self::usage::{persist_usage_snapshot, refresh_usage_snapshot};

View File

@@ -10,16 +10,29 @@ use crate::model::{ControllerState, UsageSnapshot, UsageWindow};
use crate::repo; use crate::repo;
pub fn refresh_usage_snapshot(state: &ControllerState) -> UsageSnapshot { pub fn refresh_usage_snapshot(state: &ControllerState) -> UsageSnapshot {
fetch_live_usage_snapshot().unwrap_or_else(|_| cached_usage_snapshot(state)) match fetch_live_usage_snapshot() {
Ok(snapshot) => snapshot,
Err(error) => cached_usage_snapshot(state).unwrap_or_else(|| {
UsageSnapshot::unavailable(format!("usage fetch failed: {}", error))
}),
}
} }
fn cached_usage_snapshot(state: &ControllerState) -> UsageSnapshot { pub fn persist_usage_snapshot(state: &mut ControllerState, snapshot: &UsageSnapshot) {
state.last_usage_refresh_at = snapshot.refreshed_at.clone();
state.last_usage_input_tokens = snapshot.input_tokens;
state.last_usage_output_tokens = snapshot.output_tokens;
state.last_usage_primary_window = snapshot.primary.clone();
state.last_usage_secondary_window = snapshot.secondary.clone();
}
fn cached_usage_snapshot(state: &ControllerState) -> Option<UsageSnapshot> {
if state.last_usage_primary_window.is_some() if state.last_usage_primary_window.is_some()
|| state.last_usage_secondary_window.is_some() || state.last_usage_secondary_window.is_some()
|| state.last_usage_input_tokens.is_some() || state.last_usage_input_tokens.is_some()
|| state.last_usage_output_tokens.is_some() || state.last_usage_output_tokens.is_some()
{ {
UsageSnapshot { Some(UsageSnapshot {
input_tokens: state.last_usage_input_tokens, input_tokens: state.last_usage_input_tokens,
output_tokens: state.last_usage_output_tokens, output_tokens: state.last_usage_output_tokens,
primary: state.last_usage_primary_window.clone(), primary: state.last_usage_primary_window.clone(),
@@ -27,9 +40,9 @@ fn cached_usage_snapshot(state: &ControllerState) -> UsageSnapshot {
refreshed_at: Some(repo::now_timestamp()), refreshed_at: Some(repo::now_timestamp()),
available: true, available: true,
note: Some("cached snapshot".to_string()), note: Some("cached snapshot".to_string()),
} })
} else { } else {
UsageSnapshot::unavailable("codex usage unavailable") None
} }
} }
@@ -184,7 +197,7 @@ mod tests {
..ControllerState::default() ..ControllerState::default()
}; };
let snapshot = cached_usage_snapshot(&state); let snapshot = cached_usage_snapshot(&state).expect("cached snapshot");
assert!(snapshot.available); assert!(snapshot.available);
assert_eq!( assert_eq!(
snapshot.primary.as_ref().and_then(|window| window.resets_at), snapshot.primary.as_ref().and_then(|window| window.resets_at),
@@ -202,7 +215,8 @@ mod tests {
#[test] #[test]
fn refresh_usage_snapshot_falls_back_when_usage_missing() { fn refresh_usage_snapshot_falls_back_when_usage_missing() {
let snapshot = cached_usage_snapshot(&ControllerState::default()); let snapshot = cached_usage_snapshot(&ControllerState::default())
.unwrap_or_else(|| UsageSnapshot::unavailable("codex usage unavailable"));
assert!(!snapshot.available); assert!(!snapshot.available);
assert_eq!(snapshot.primary, None); assert_eq!(snapshot.primary, None);
assert_eq!(snapshot.secondary, None); assert_eq!(snapshot.secondary, None);

View File

@@ -58,7 +58,11 @@ pub(crate) fn list_controller_summaries_in(root: &Path) -> Result<Vec<Controller
completed_steps: state.completed_steps.len(), completed_steps: state.completed_steps.len(),
total_steps: plan.steps.len(), total_steps: plan.steps.len(),
last_updated: controller_last_updated(&state), last_updated: controller_last_updated(&state),
branch: config.branch.clone(), branch: if state.allow_branching {
config.branch.clone()
} else {
"current".to_string()
},
}); });
} }

View File

@@ -337,10 +337,38 @@ fn render_create_controller(frame: &mut Frame, app: &App) {
), ),
]), ]),
Line::from(""), Line::from(""),
Line::from(vec![
Span::styled("Model ", Style::default().fg(TEXT_DIM)),
Span::styled(app.create_model(), Style::default().fg(GREEN)),
]),
Line::from(vec![
Span::styled("Fast mode ", Style::default().fg(TEXT_DIM)),
Span::styled(
if app.create_fast_mode { "on" } else { "off" },
Style::default().fg(if app.create_fast_mode { GREEN } else { TEXT }),
),
]),
Line::from(vec![
Span::styled("Branching ", Style::default().fg(TEXT_DIM)),
Span::styled(
if app.create_allow_branching {
"allowed"
} else {
"disabled"
},
Style::default().fg(if app.create_allow_branching { GOLD } else { GREEN }),
),
]),
Line::from(""),
Line::from(Span::styled( Line::from(Span::styled(
"Example: Build the intuitive controller-first TUI picker and workspace.", "Example: Build the intuitive controller-first TUI picker and workspace.",
Style::default().fg(TEXT_DIM), Style::default().fg(TEXT_DIM),
)), )),
Line::from(""),
Line::from(Span::styled(
"Controls: F2 cycle model, F3 toggle fast mode, F4 toggle branching, Enter starts.",
Style::default().fg(TEXT_DIM),
)),
]; ];
if let Some(error) = &app.create_error { if let Some(error) = &app.create_error {
@@ -1202,6 +1230,9 @@ mod tests {
}], }],
picker_selected: 0, picker_selected: 0,
create_input: "Build the picker flow".to_string(), create_input: "Build the picker flow".to_string(),
create_model_index: 0,
create_fast_mode: false,
create_allow_branching: false,
create_error: None, create_error: None,
default_task_path: std::path::PathBuf::from(".agent/controller-loop/task.toon"), default_task_path: std::path::PathBuf::from(".agent/controller-loop/task.toon"),
frame_tick: 0, frame_tick: 0,
@@ -1300,10 +1331,15 @@ mod tests {
#[test] #[test]
fn renders_create_screen() { fn renders_create_screen() {
let app = sample_app(Screen::CreateController); let mut app = sample_app(Screen::CreateController);
app.create_fast_mode = true;
app.create_allow_branching = false;
let rendered = render_to_text(app); let rendered = render_to_text(app);
assert!(rendered.contains("Create Controller")); assert!(rendered.contains("Create Controller"));
assert!(rendered.contains("generated by GPT-5.4 mini")); assert!(rendered.contains("generated by GPT-5.4 mini"));
assert!(rendered.contains("Model gpt-5.4"));
assert!(rendered.contains("Fast mode on"));
assert!(rendered.contains("Branching disabled"));
} }
#[test] #[test]