Compare commits

..

1 Commits

Author SHA1 Message Date
953b61c920 feat: inject crash context into next session's system prompt
When the agent crashes with an unhandled exception, record the error
(type + message) on the SessionEntry.  On the next message, inject a
[System note: ...] into the context prompt so the agent wakes up
knowing what failed and can self-diagnose instead of starting blind.

Three injection points:
- SessionEntry: new fields last_error_type / last_error_message /
  last_error_time (gateway/session.py)
- Exception handler: record crash on session_entry before returning
  error to user (gateway/run.py:~9107)
- Context prompt builder: prepend crash note and clear fields so
  the notice appears exactly once (gateway/run.py:~8216)

The agent sees: [System note: The previous session crashed with
ValueError: Model gpt-5.5 has a context window of 4,096 tokens...]
2026-05-29 15:12:10 +00:00
5 changed files with 49 additions and 271 deletions

View File

@ -580,13 +580,6 @@ class ContextCompressor(ContextEngine):
self.summary_model = summary_model_override or "" self.summary_model = summary_model_override or ""
# Compression-model fallback: set by check_compression_model_feasibility
# when the primary aux compression model fails the minimum context check.
# If set, _generate_summary uses this provider/model for the LLM call
# instead of the main compressor attributes. Dict keys:
# provider, model, base_url, api_key
self._compression_fallback: Optional[Dict[str, str]] = None
# Stores the previous compaction summary for iterative updates # Stores the previous compaction summary for iterative updates
self._previous_summary: Optional[str] = None self._previous_summary: Optional[str] = None
# Anti-thrashing: track whether last compression was effective # Anti-thrashing: track whether last compression was effective
@ -1076,20 +1069,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
} }
if self.summary_model: if self.summary_model:
call_kwargs["model"] = self.summary_model call_kwargs["model"] = self.summary_model
# Compression-model fallback: when the primary aux compression
# model was rejected for insufficient context, the feasibility
# check stored a replacement provider/model here. Override the
# entire main_runtime so call_llm routes the summary request to
# the fallback provider instead of the main one.
if self._compression_fallback:
_fb = self._compression_fallback
call_kwargs["main_runtime"] = {
"model": _fb["model"],
"provider": _fb["provider"],
"base_url": _fb.get("base_url", ""),
"api_key": _fb.get("api_key", ""),
"api_mode": _fb.get("api_mode", self.api_mode),
}
response = call_llm(**call_kwargs) response = call_llm(**call_kwargs)
content = response.choices[0].message.content content = response.choices[0].message.content
# Handle cases where content is not a string (e.g., dict from llama.cpp) # Handle cases where content is not a string (e.g., dict from llama.cpp)

View File

@ -221,101 +221,9 @@ def check_compression_model_feasibility(agent: Any) -> None:
new_threshold, new_threshold,
) )
except ValueError: except ValueError:
# Primary compression model failed the minimum context check # Hard rejections (aux below minimum context) must propagate
# (context_length < MINIMUM_CONTEXT_LENGTH). Before giving up, # so the session refuses to start.
# try the user's fallback provider chain so a model switch or raise
# provider outage doesn't silently disable compression.
_fallback_chain = getattr(agent, '_fallback_chain', None) or []
_tried = [f"{aux_model} ({_aux_cfg_provider or 'auto'}): {aux_context:,} ctx < {MINIMUM_CONTEXT_LENGTH:,}"]
for _fb_entry in _fallback_chain:
_fb_provider = _fb_entry.get("provider", "")
_fb_model = _fb_entry.get("model", "")
if not _fb_provider or not _fb_model:
continue
try:
from agent.auxiliary_client import resolve_provider_client
_fb_client, _fb_resolved_model = resolve_provider_client(
_fb_provider,
_fb_model,
explicit_base_url=_fb_entry.get("base_url", ""),
explicit_api_key=_fb_entry.get("api_key", ""),
main_runtime=agent._current_main_runtime(),
)
if _fb_client is None or not _fb_resolved_model:
_tried.append(f"{_fb_model} ({_fb_provider}): unavailable")
continue
_fb_base_url = str(getattr(_fb_client, "base_url", ""))
_fb_api_key_raw = getattr(_fb_client, "api_key", "")
_fb_api_key = (
""
if callable(_fb_api_key_raw) and not isinstance(_fb_api_key_raw, str)
else str(_fb_api_key_raw or "")
)
_fb_context = get_model_context_length(
_fb_resolved_model,
base_url=_fb_base_url,
api_key=_fb_api_key,
provider=_fb_provider,
custom_providers=getattr(agent, "_custom_providers", None),
)
if _fb_context and _fb_context < MINIMUM_CONTEXT_LENGTH:
_tried.append(
f"{_fb_resolved_model} ({_fb_provider}): "
f"{_fb_context:,} ctx < {MINIMUM_CONTEXT_LENGTH:,}"
)
continue
# ── Found a suitable fallback ──────────────────────────
logger.warning(
"Compression model %s (%s) has only %d token context "
"(minimum %d). Falling back to %s (%s) with %d token context.",
aux_model, _aux_cfg_provider or "auto", aux_context,
MINIMUM_CONTEXT_LENGTH, _fb_resolved_model, _fb_provider,
_fb_context or 0,
)
agent.context_compressor._compression_fallback = {
"provider": _fb_provider,
"model": _fb_resolved_model,
"base_url": _fb_base_url,
"api_key": _fb_api_key,
}
_msg = (
f"⚠ Compression model {aux_model} has only "
f"{aux_context:,} token context (minimum "
f"{MINIMUM_CONTEXT_LENGTH:,} required). "
f"Falling back to {_fb_resolved_model} ({_fb_provider}) "
f"for summaries."
)
agent._compression_warning = _msg
agent._emit_status(_msg)
return
except Exception as _fb_err:
_tried.append(f"{_fb_model} ({_fb_provider}): {_fb_err}")
continue
# No fallback worked — warn and let compression run without
# summaries (same behavior as 'no auxiliary LLM' above).
_all_tried = "; ".join(_tried)
_msg = (
f"⚠ No suitable compression model available. "
f"Tried: {_all_tried}. "
f"Compression will drop middle turns without summaries. "
f"Run `hermes setup` or set "
f"auxiliary.compression.model in config.yaml."
)
agent._compression_warning = _msg
agent._emit_status(_msg)
logger.warning("Compression model fallback exhausted: %s", _all_tried)
return
except Exception as exc: except Exception as exc:
logger.debug( logger.debug(
"Compression feasibility check failed (non-fatal): %s", exc "Compression feasibility check failed (non-fatal): %s", exc

View File

@ -8216,6 +8216,28 @@ class GatewayRunner:
context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]" context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]"
context_prompt = context_note + "\n\n" + context_prompt context_prompt = context_note + "\n\n" + context_prompt
# If the previous agent turn crashed, prepend a crash-context notice
# so the agent knows what went wrong and can self-diagnose. The
# error fields are cleared after this read so the notice appears
# exactly once (on the first message of the fresh session).
_last_err_type = getattr(session_entry, 'last_error_type', None)
if _last_err_type:
_last_err_msg = getattr(session_entry, 'last_error_message', '') or ''
_crash_note = (
f"[System note: The previous session crashed with "
f"{_last_err_type}: {_last_err_msg[:300]}. "
f"Diagnose and fix the root cause if possible — "
f"do NOT just retry the same thing.]"
)
context_prompt = _crash_note + "\n\n" + context_prompt
# Clear so the notice appears only once
try:
session_entry.last_error_type = None
session_entry.last_error_message = None
session_entry.last_error_time = None
except Exception:
pass
# Send a user-facing notification explaining the reset, unless: # Send a user-facing notification explaining the reset, unless:
# - notifications are disabled in config # - notifications are disabled in config
# - the platform is excluded (e.g. api_server, webhook) # - the platform is excluded (e.g. api_server, webhook)
@ -9098,6 +9120,14 @@ class GatewayRunner:
logger.exception("Agent error in session %s", session_key) logger.exception("Agent error in session %s", session_key)
error_type = type(e).__name__ error_type = type(e).__name__
error_detail = str(e)[:300] if str(e) else "no details available" error_detail = str(e)[:300] if str(e) else "no details available"
# Record the crash on the session entry so the next session's
# agent can self-diagnose the failure instead of starting blind.
try:
session_entry.last_error_type = error_type
session_entry.last_error_message = error_detail
session_entry.last_error_time = time.time()
except Exception:
pass # defensive — never let error-recording cause a secondary crash
status_hint = "" status_hint = ""
status_code = getattr(e, "status_code", None) status_code = getattr(e, "status_code", None)
_hist_len = len(history) if 'history' in locals() else 0 _hist_len = len(history) if 'history' in locals() else 0

View File

@ -453,6 +453,13 @@ class SessionEntry:
# Last API-reported prompt tokens (for accurate compression pre-check) # Last API-reported prompt tokens (for accurate compression pre-check)
last_prompt_tokens: int = 0 last_prompt_tokens: int = 0
# Set when the previous agent turn crashed with an unhandled exception.
# Consumed once by the message handler to inject a crash-context notice
# into the next session's system prompt so the agent can self-diagnose.
last_error_type: Optional[str] = None
last_error_message: Optional[str] = None
last_error_time: Optional[float] = None
# Set when a session was created because the previous one expired; # Set when a session was created because the previous one expired;
# consumed once by the message handler to inject a notice into context # consumed once by the message handler to inject a notice into context
was_auto_reset: bool = False was_auto_reset: bool = False

View File

@ -57,7 +57,6 @@ def _make_agent(
compressor = MagicMock(spec=ContextCompressor) compressor = MagicMock(spec=ContextCompressor)
compressor.context_length = main_context compressor.context_length = main_context
compressor.threshold_tokens = int(main_context * threshold_percent) compressor.threshold_tokens = int(main_context * threshold_percent)
compressor._compression_fallback = None
agent.context_compressor = compressor agent.context_compressor = compressor
return agent return agent
@ -102,169 +101,24 @@ def test_auto_corrects_threshold_when_aux_context_below_threshold(mock_get_clien
@patch("agent.model_metadata.get_model_context_length", return_value=32_768) @patch("agent.model_metadata.get_model_context_length", return_value=32_768)
@patch("agent.auxiliary_client.get_text_auxiliary_client") @patch("agent.auxiliary_client.get_text_auxiliary_client")
def test_rejects_aux_below_minimum_context(mock_get_client, mock_ctx_len): def test_rejects_aux_below_minimum_context(mock_get_client, mock_ctx_len):
"""When aux context < MINIMUM_CONTEXT_LENGTH (64K) and no fallback """Hard floor: aux context < MINIMUM_CONTEXT_LENGTH (64K) → session
providers are configured, a warning is emitted and compression will refuses to start (ValueError), mirroring the main-model rejection."""
operate without summaries. Previously this raised ValueError; now it
degrades gracefully so a model switch doesn't kill the session."""
agent = _make_agent(main_context=200_000, threshold_percent=0.50) agent = _make_agent(main_context=200_000, threshold_percent=0.50)
mock_client = MagicMock() mock_client = MagicMock()
mock_client.base_url = "https://openrouter.ai/api/v1" mock_client.base_url = "https://openrouter.ai/api/v1"
mock_client.api_key = "sk-aux" mock_client.api_key = "sk-aux"
mock_get_client.return_value = (mock_client, "tiny-aux-model") mock_get_client.return_value = (mock_client, "tiny-aux-model")
messages = [] agent._emit_status = lambda msg: None
agent._emit_status = lambda msg: messages.append(msg)
# No fallback chain → should warn, not raise with pytest.raises(ValueError) as exc_info:
agent._fallback_chain = []
agent._check_compression_model_feasibility()
assert len(messages) == 1
assert "No suitable compression model" in messages[0]
assert "tiny-aux-model" in messages[0]
assert "32,768" in messages[0]
assert "64,000" in messages[0]
assert agent._compression_warning is not None
@patch("agent.model_metadata.get_model_context_length")
@patch("agent.auxiliary_client.get_text_auxiliary_client")
def test_falls_back_to_chain_when_aux_below_minimum(mock_get_client, mock_ctx_len):
"""When the primary aux model fails the context-length floor, the
feasibility check tries each fallback provider in order, using the
first one that meets MINIMUM_CONTEXT_LENGTH."""
agent = _make_agent(main_context=200_000, threshold_percent=0.50)
# Primary aux model: too small (32K)
mock_primary_client = MagicMock()
mock_primary_client.base_url = "https://openrouter.ai/api/v1"
mock_primary_client.api_key = "sk-aux"
mock_get_client.return_value = (mock_primary_client, "tiny-aux-model")
# Fallback chain: two providers, first one meets the floor
agent._fallback_chain = [
{"provider": "opencode_go", "model": "deepseek-v4-pro"},
{"provider": "custom", "model": "gemma-local",
"base_url": "http://127.0.0.1:8081/v1", "api_key": "no-key"},
]
# Mock resolve_provider_client for the fallback resolution
mock_fb_client = MagicMock()
mock_fb_client.base_url = "https://api.opencode.ai/v1"
mock_fb_client.api_key = "sk-fallback"
# get_model_context_length: first return 32K (primary fail),
# then return 128K (fallback success)
mock_ctx_len.side_effect = [32_768, 128_000]
messages = []
agent._emit_status = lambda msg: messages.append(msg)
with patch("agent.auxiliary_client.resolve_provider_client",
return_value=(mock_fb_client, "deepseek-v4-pro")) as mock_resolve:
agent._check_compression_model_feasibility() agent._check_compression_model_feasibility()
# Should have resolved the fallback provider err = str(exc_info.value)
mock_resolve.assert_called_once() assert "tiny-aux-model" in err
# First two positional args: provider, model assert "32,768" in err
assert mock_resolve.call_args[0][0] == "opencode_go" assert "64,000" in err
assert mock_resolve.call_args[0][1] == "deepseek-v4-pro" assert "below the minimum" in err
# Warning should mention the fallback choice
assert len(messages) == 1
assert "Falling back to" in messages[0]
assert "deepseek-v4-pro" in messages[0]
assert "opencode_go" in messages[0]
# Fallback dict stored on compressor
fb = agent.context_compressor._compression_fallback
assert fb is not None
assert fb["provider"] == "opencode_go"
assert fb["model"] == "deepseek-v4-pro"
@patch("agent.model_metadata.get_model_context_length")
@patch("agent.auxiliary_client.get_text_auxiliary_client")
def test_falls_back_past_unavailable_provider(mock_get_client, mock_ctx_len):
"""When the first fallback provider is unavailable, skip it and
try the next one."""
agent = _make_agent(main_context=200_000, threshold_percent=0.50)
mock_primary_client = MagicMock()
mock_primary_client.base_url = "https://openrouter.ai/api/v1"
mock_primary_client.api_key = "sk-aux"
mock_get_client.return_value = (mock_primary_client, "tiny")
# Fallback chain: first unavailable, second works
agent._fallback_chain = [
{"provider": "broken-provider", "model": "broken-model"},
{"provider": "opencode_go", "model": "deepseek-v4-pro"},
]
mock_fb_client = MagicMock()
mock_fb_client.base_url = "https://api.opencode.ai/v1"
mock_fb_client.api_key = "sk-fallback"
# Primary: 32K (fail), broken-provider: unavailable, opencode_go: 128K
mock_ctx_len.side_effect = [32_768, None, 128_000]
messages = []
agent._emit_status = lambda msg: messages.append(msg)
# First resolve returns None (unavailable), second returns client
mock_resolve_values = [(None, None), (mock_fb_client, "deepseek-v4-pro")]
with patch("agent.auxiliary_client.resolve_provider_client",
side_effect=mock_resolve_values) as mock_resolve:
agent._check_compression_model_feasibility()
# Should have tried both fallbacks
assert mock_resolve.call_count == 2
# Should succeed with the second fallback
fb = agent.context_compressor._compression_fallback
assert fb is not None
assert fb["provider"] == "opencode_go"
@patch("agent.model_metadata.get_model_context_length")
@patch("agent.auxiliary_client.get_text_auxiliary_client")
def test_warns_when_all_fallbacks_exhausted(mock_get_client, mock_ctx_len):
"""When every fallback provider also fails the context floor or is
unavailable, emit a warning and degrade to no-summary mode without
raising."""
agent = _make_agent(main_context=200_000, threshold_percent=0.50)
mock_primary_client = MagicMock()
mock_primary_client.base_url = "https://openrouter.ai/api/v1"
mock_primary_client.api_key = "sk-aux"
mock_get_client.return_value = (mock_primary_client, "tiny-main")
agent._fallback_chain = [
{"provider": "small-provider", "model": "small-model"},
]
# Fallback also too small
mock_fb_client = MagicMock()
mock_fb_client.base_url = "https://small.api/v1"
mock_fb_client.api_key = "sk-small"
mock_ctx_len.side_effect = [32_768, 16_384]
messages = []
agent._emit_status = lambda msg: messages.append(msg)
# Mock compressor won't have _compression_fallback until set —
# initialize it so the final assertion works.
agent.context_compressor._compression_fallback = None
with patch("agent.auxiliary_client.resolve_provider_client",
return_value=(mock_fb_client, "small-model")):
agent._check_compression_model_feasibility()
assert len(messages) == 1
assert "No suitable compression model" in messages[0]
assert "small-model" in messages[0]
assert agent._compression_warning is not None
# No fallback on compressor
assert agent.context_compressor._compression_fallback is None
@patch("agent.model_metadata.get_model_context_length", return_value=200_000) @patch("agent.model_metadata.get_model_context_length", return_value=200_000)