feat: inject crash context into next session's system prompt

When the agent crashes with an unhandled exception, record the error
(type + message) on the SessionEntry.  On the next message, inject a
[System note: ...] into the context prompt so the agent wakes up
knowing what failed and can self-diagnose instead of starting blind.

Three injection points:
- SessionEntry: new fields last_error_type / last_error_message /
  last_error_time (gateway/session.py)
- Exception handler: record crash on session_entry before returning
  error to user (gateway/run.py:~9107)
- Context prompt builder: prepend crash note and clear fields so
  the notice appears exactly once (gateway/run.py:~8216)

The agent sees: [System note: The previous session crashed with
ValueError: Model gpt-5.5 has a context window of 4,096 tokens...]
This commit is contained in:
Anton Palgunov 2026-05-29 15:12:10 +00:00
parent 2517917de3
commit 953b61c920
2 changed files with 37 additions and 0 deletions

View File

@ -8216,6 +8216,28 @@ class GatewayRunner:
context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]"
context_prompt = context_note + "\n\n" + context_prompt
# If the previous agent turn crashed, prepend a crash-context notice
# so the agent knows what went wrong and can self-diagnose. The
# error fields are cleared after this read so the notice appears
# exactly once (on the first message of the fresh session).
_last_err_type = getattr(session_entry, 'last_error_type', None)
if _last_err_type:
_last_err_msg = getattr(session_entry, 'last_error_message', '') or ''
_crash_note = (
f"[System note: The previous session crashed with "
f"{_last_err_type}: {_last_err_msg[:300]}. "
f"Diagnose and fix the root cause if possible — "
f"do NOT just retry the same thing.]"
)
context_prompt = _crash_note + "\n\n" + context_prompt
# Clear so the notice appears only once
try:
session_entry.last_error_type = None
session_entry.last_error_message = None
session_entry.last_error_time = None
except Exception:
pass
# Send a user-facing notification explaining the reset, unless:
# - notifications are disabled in config
# - the platform is excluded (e.g. api_server, webhook)
@ -9098,6 +9120,14 @@ class GatewayRunner:
logger.exception("Agent error in session %s", session_key)
error_type = type(e).__name__
error_detail = str(e)[:300] if str(e) else "no details available"
# Record the crash on the session entry so the next session's
# agent can self-diagnose the failure instead of starting blind.
try:
session_entry.last_error_type = error_type
session_entry.last_error_message = error_detail
session_entry.last_error_time = time.time()
except Exception:
pass # defensive — never let error-recording cause a secondary crash
status_hint = ""
status_code = getattr(e, "status_code", None)
_hist_len = len(history) if 'history' in locals() else 0

View File

@ -453,6 +453,13 @@ class SessionEntry:
# Last API-reported prompt tokens (for accurate compression pre-check)
last_prompt_tokens: int = 0
# Set when the previous agent turn crashed with an unhandled exception.
# Consumed once by the message handler to inject a crash-context notice
# into the next session's system prompt so the agent can self-diagnose.
last_error_type: Optional[str] = None
last_error_message: Optional[str] = None
last_error_time: Optional[float] = None
# Set when a session was created because the previous one expired;
# consumed once by the message handler to inject a notice into context
was_auto_reset: bool = False