feat: session improvements — crash-context, compression fallback, semantic RLE, telegram voice/test coverage
This commit is contained in:
parent
2517917de3
commit
f95f2daa4b
@ -943,6 +943,10 @@ def restore_primary_runtime(agent) -> bool:
|
||||
agent._fallback_activated = False
|
||||
agent._fallback_index = 0
|
||||
|
||||
agent._emit_status(
|
||||
f"✅ Primary model recovered — switching back: "
|
||||
f"{agent.model} via {agent.provider}"
|
||||
)
|
||||
logger.info(
|
||||
"Primary runtime restored for new turn: %s (%s)",
|
||||
agent.model, agent.provider,
|
||||
|
||||
@ -846,6 +846,8 @@ def _extract_responses_message_text(item: Any) -> str:
|
||||
|
||||
chunks: List[str] = []
|
||||
for part in content:
|
||||
if part is None:
|
||||
continue
|
||||
ptype = getattr(part, "type", None)
|
||||
if ptype not in {"output_text", "text"}:
|
||||
continue
|
||||
@ -861,6 +863,8 @@ def _extract_responses_reasoning_text(item: Any) -> str:
|
||||
if isinstance(summary, list):
|
||||
chunks: List[str] = []
|
||||
for part in summary:
|
||||
if part is None:
|
||||
continue
|
||||
text = getattr(part, "text", None)
|
||||
if isinstance(text, str) and text:
|
||||
chunks.append(text)
|
||||
@ -920,7 +924,13 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
saw_commentary_phase = False
|
||||
saw_final_answer_phase = False
|
||||
|
||||
for item in output:
|
||||
for idx, item in enumerate(output):
|
||||
if item is None:
|
||||
logger.warning(
|
||||
"Codex response output contains None item (item #%d of %d); skipping.",
|
||||
idx, len(output),
|
||||
)
|
||||
continue
|
||||
item_type = getattr(item, "type", None)
|
||||
item_status = getattr(item, "status", None)
|
||||
if isinstance(item_status, str):
|
||||
|
||||
@ -335,7 +335,15 @@ def run_codex_stream(agent, api_kwargs: dict, client: Any = None, on_first_delta
|
||||
)
|
||||
return agent._run_codex_create_stream_fallback(api_kwargs, client=active_client)
|
||||
raise
|
||||
|
||||
except Exception as exc:
|
||||
# Catch-all for unexpected errors during stream processing
|
||||
# (e.g. TypeError when a response field is None that we try to iterate).
|
||||
# Log with full traceback so we can diagnose, then re-raise.
|
||||
logger.exception(
|
||||
"Codex Responses stream raised unexpected %s: %s. %s",
|
||||
type(exc).__name__, exc, agent._client_log_context(),
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
def run_codex_create_stream_fallback(agent, api_kwargs: dict, client: Any = None):
|
||||
|
||||
@ -580,6 +580,13 @@ class ContextCompressor(ContextEngine):
|
||||
|
||||
self.summary_model = summary_model_override or ""
|
||||
|
||||
# Compression-model fallback: set by check_compression_model_feasibility
|
||||
# when the primary aux compression model fails the minimum context check.
|
||||
# If set, _generate_summary uses this provider/model for the LLM call
|
||||
# instead of the main compressor attributes. Dict keys:
|
||||
# provider, model, base_url, api_key
|
||||
self._compression_fallback: Optional[Dict[str, str]] = None
|
||||
|
||||
# Stores the previous compaction summary for iterative updates
|
||||
self._previous_summary: Optional[str] = None
|
||||
# Anti-thrashing: track whether last compression was effective
|
||||
@ -1069,6 +1076,20 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
}
|
||||
if self.summary_model:
|
||||
call_kwargs["model"] = self.summary_model
|
||||
# Compression-model fallback: when the primary aux compression
|
||||
# model was rejected for insufficient context, the feasibility
|
||||
# check stored a replacement provider/model here. Override the
|
||||
# entire main_runtime so call_llm routes the summary request to
|
||||
# the fallback provider instead of the main one.
|
||||
if self._compression_fallback:
|
||||
_fb = self._compression_fallback
|
||||
call_kwargs["main_runtime"] = {
|
||||
"model": _fb["model"],
|
||||
"provider": _fb["provider"],
|
||||
"base_url": _fb.get("base_url", ""),
|
||||
"api_key": _fb.get("api_key", ""),
|
||||
"api_mode": _fb.get("api_mode", self.api_mode),
|
||||
}
|
||||
response = call_llm(**call_kwargs)
|
||||
content = response.choices[0].message.content
|
||||
# Handle cases where content is not a string (e.g., dict from llama.cpp)
|
||||
|
||||
@ -221,9 +221,101 @@ def check_compression_model_feasibility(agent: Any) -> None:
|
||||
new_threshold,
|
||||
)
|
||||
except ValueError:
|
||||
# Hard rejections (aux below minimum context) must propagate
|
||||
# so the session refuses to start.
|
||||
raise
|
||||
# Primary compression model failed the minimum context check
|
||||
# (context_length < MINIMUM_CONTEXT_LENGTH). Before giving up,
|
||||
# try the user's fallback provider chain so a model switch or
|
||||
# provider outage doesn't silently disable compression.
|
||||
_fallback_chain = getattr(agent, '_fallback_chain', None) or []
|
||||
_tried = [f"{aux_model} ({_aux_cfg_provider or 'auto'}): {aux_context:,} ctx < {MINIMUM_CONTEXT_LENGTH:,}"]
|
||||
|
||||
for _fb_entry in _fallback_chain:
|
||||
_fb_provider = _fb_entry.get("provider", "")
|
||||
_fb_model = _fb_entry.get("model", "")
|
||||
if not _fb_provider or not _fb_model:
|
||||
continue
|
||||
|
||||
try:
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
|
||||
_fb_client, _fb_resolved_model = resolve_provider_client(
|
||||
_fb_provider,
|
||||
_fb_model,
|
||||
explicit_base_url=_fb_entry.get("base_url", ""),
|
||||
explicit_api_key=_fb_entry.get("api_key", ""),
|
||||
main_runtime=agent._current_main_runtime(),
|
||||
)
|
||||
if _fb_client is None or not _fb_resolved_model:
|
||||
_tried.append(f"{_fb_model} ({_fb_provider}): unavailable")
|
||||
continue
|
||||
|
||||
_fb_base_url = str(getattr(_fb_client, "base_url", ""))
|
||||
_fb_api_key_raw = getattr(_fb_client, "api_key", "")
|
||||
_fb_api_key = (
|
||||
""
|
||||
if callable(_fb_api_key_raw) and not isinstance(_fb_api_key_raw, str)
|
||||
else str(_fb_api_key_raw or "")
|
||||
)
|
||||
|
||||
_fb_context = get_model_context_length(
|
||||
_fb_resolved_model,
|
||||
base_url=_fb_base_url,
|
||||
api_key=_fb_api_key,
|
||||
provider=_fb_provider,
|
||||
custom_providers=getattr(agent, "_custom_providers", None),
|
||||
)
|
||||
|
||||
if _fb_context and _fb_context < MINIMUM_CONTEXT_LENGTH:
|
||||
_tried.append(
|
||||
f"{_fb_resolved_model} ({_fb_provider}): "
|
||||
f"{_fb_context:,} ctx < {MINIMUM_CONTEXT_LENGTH:,}"
|
||||
)
|
||||
continue
|
||||
|
||||
# ── Found a suitable fallback ──────────────────────────
|
||||
logger.warning(
|
||||
"Compression model %s (%s) has only %d token context "
|
||||
"(minimum %d). Falling back to %s (%s) with %d token context.",
|
||||
aux_model, _aux_cfg_provider or "auto", aux_context,
|
||||
MINIMUM_CONTEXT_LENGTH, _fb_resolved_model, _fb_provider,
|
||||
_fb_context or 0,
|
||||
)
|
||||
|
||||
agent.context_compressor._compression_fallback = {
|
||||
"provider": _fb_provider,
|
||||
"model": _fb_resolved_model,
|
||||
"base_url": _fb_base_url,
|
||||
"api_key": _fb_api_key,
|
||||
}
|
||||
|
||||
_msg = (
|
||||
f"⚠ Compression model {aux_model} has only "
|
||||
f"{aux_context:,} token context (minimum "
|
||||
f"{MINIMUM_CONTEXT_LENGTH:,} required). "
|
||||
f"Falling back to {_fb_resolved_model} ({_fb_provider}) "
|
||||
f"for summaries."
|
||||
)
|
||||
agent._compression_warning = _msg
|
||||
agent._emit_status(_msg)
|
||||
return
|
||||
|
||||
except Exception as _fb_err:
|
||||
_tried.append(f"{_fb_model} ({_fb_provider}): {_fb_err}")
|
||||
continue
|
||||
|
||||
# No fallback worked — warn and let compression run without
|
||||
# summaries (same behavior as 'no auxiliary LLM' above).
|
||||
_all_tried = "; ".join(_tried)
|
||||
_msg = (
|
||||
f"⚠ No suitable compression model available. "
|
||||
f"Tried: {_all_tried}. "
|
||||
f"Compression will drop middle turns without summaries. "
|
||||
f"Run `hermes setup` or set "
|
||||
f"auxiliary.compression.model in config.yaml."
|
||||
)
|
||||
agent._compression_warning = _msg
|
||||
agent._emit_status(_msg)
|
||||
logger.warning("Compression model fallback exhausted: %s", _all_tried)
|
||||
return
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Compression feasibility check failed (non-fatal): %s", exc
|
||||
|
||||
@ -1814,6 +1814,14 @@ def run_conversation(
|
||||
break
|
||||
|
||||
except Exception as api_error:
|
||||
# Log full traceback immediately so we can diagnose what broke.
|
||||
# Many recovery paths below can swallow or transform the error;
|
||||
# this guarantees we always have the original stack trace.
|
||||
logger.exception(
|
||||
"%sAPI call raised exception (attempt %s/%s, thread=%s): %s",
|
||||
agent.log_prefix, retry_count + 1, max_retries,
|
||||
agent._thread_identity(), api_error,
|
||||
)
|
||||
# Stop spinner before printing error messages
|
||||
if thinking_spinner:
|
||||
thinking_spinner.stop("(╥_╥) error, retrying...")
|
||||
@ -2822,6 +2830,10 @@ def run_conversation(
|
||||
# provider/network failure (malformed response body,
|
||||
# truncated stream, routing layer corruption), not a
|
||||
# local programming bug, and should be retried (#14782).
|
||||
# Exclude TypeError when it originates from SDK response
|
||||
# processing (e.g. 'NoneType' object is not iterable when
|
||||
# a server sends output: null) — that's a transient API/SDK
|
||||
# bug, not a local programming error. (#14950)
|
||||
is_local_validation_error = (
|
||||
isinstance(api_error, (ValueError, TypeError))
|
||||
and not isinstance(
|
||||
@ -2835,6 +2847,14 @@ def run_conversation(
|
||||
# ssl.SSLError explicitly so the error classifier's
|
||||
# retryable=True mapping takes effect instead.
|
||||
and not isinstance(api_error, ssl.SSLError)
|
||||
# TypeError from the SDK is a transient bug in API
|
||||
# response processing (e.g. NoneType not iterable when
|
||||
# server returns output: null). Do NOT treat it as a
|
||||
# local programming error — it should be retried.
|
||||
and not (
|
||||
isinstance(api_error, TypeError)
|
||||
and not isinstance(api_error, (UnicodeEncodeError, ssl.SSLError))
|
||||
)
|
||||
)
|
||||
# ``FailoverReason.billing`` (HTTP 402) is NOT in this
|
||||
# exclusion set. By the time we reach this block:
|
||||
@ -2887,6 +2907,22 @@ def run_conversation(
|
||||
agent._vprint(f"{agent.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.", force=True)
|
||||
agent._vprint(f"{agent.log_prefix} 🔌 Provider: {_provider} Model: {_model}", force=True)
|
||||
agent._vprint(f"{agent.log_prefix} 🌐 Endpoint: {_base}", force=True)
|
||||
# Surface local validation bugs (TypeError, ValueError) with a
|
||||
# traceback pointer so the user can find the root cause in logs.
|
||||
if isinstance(api_error, (TypeError, ValueError)) and not isinstance(
|
||||
api_error, (UnicodeEncodeError, json.JSONDecodeError, ssl.SSLError)
|
||||
):
|
||||
_log_dir = os.path.join(os.path.expanduser("~"), ".hermes", "logs")
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix} 🐛 This is a local code error (not a provider failure). "
|
||||
f"Full traceback: {_log_dir}/errors.log",
|
||||
force=True,
|
||||
)
|
||||
# Also emit a status so gateway/cron users see it
|
||||
agent._emit_status(
|
||||
f"🐛 Local bug: {agent._summarize_api_error(api_error)} "
|
||||
f"— check {_log_dir}/errors.log"
|
||||
)
|
||||
# Actionable guidance for common auth errors
|
||||
if classified.is_auth or classified.reason == FailoverReason.billing:
|
||||
if _provider in {"openai-codex", "xai-oauth", "nous"} and status_code == 401:
|
||||
@ -2917,7 +2953,7 @@ def run_conversation(
|
||||
agent._vprint(f"{agent.log_prefix} • Check credits: https://openrouter.ai/settings/credits", force=True)
|
||||
else:
|
||||
agent._vprint(f"{agent.log_prefix} 💡 This type of error won't be fixed by retrying.", force=True)
|
||||
logger.error(f"{agent.log_prefix}Non-retryable client error: {api_error}")
|
||||
logger.error(f"{agent.log_prefix}Non-retryable client error: {api_error}", exc_info=True)
|
||||
# Skip session persistence when the error is likely
|
||||
# context-overflow related (status 400 + large session).
|
||||
# Persisting the failed user message would make the
|
||||
|
||||
@ -213,24 +213,44 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
_normalize_codex_response,
|
||||
)
|
||||
|
||||
import logging
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
# _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
|
||||
msg, finish_reason = _normalize_codex_response(response)
|
||||
try:
|
||||
msg, finish_reason = _normalize_codex_response(response)
|
||||
except Exception:
|
||||
_logger.exception(
|
||||
"Failed to normalize Codex response (response type=%s, has_output=%s)",
|
||||
type(response).__name__,
|
||||
hasattr(response, "output"),
|
||||
)
|
||||
raise
|
||||
|
||||
tool_calls = None
|
||||
if msg and msg.tool_calls:
|
||||
tool_calls = []
|
||||
for tc in msg.tool_calls:
|
||||
provider_data = {}
|
||||
if hasattr(tc, "call_id") and tc.call_id:
|
||||
provider_data["call_id"] = tc.call_id
|
||||
if hasattr(tc, "response_item_id") and tc.response_item_id:
|
||||
provider_data["response_item_id"] = tc.response_item_id
|
||||
tool_calls.append(ToolCall(
|
||||
id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
|
||||
name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
|
||||
arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
|
||||
provider_data=provider_data or None,
|
||||
))
|
||||
_raw_tc = msg.tool_calls
|
||||
if not hasattr(_raw_tc, "__iter__"):
|
||||
# Defensive: tool_calls should be a list; log and skip if not iterable.
|
||||
_logger.warning(
|
||||
"Codex response tool_calls is not iterable (type=%s, value=%r); "
|
||||
"skipping tool call extraction.",
|
||||
type(_raw_tc).__name__, _raw_tc,
|
||||
)
|
||||
else:
|
||||
tool_calls = []
|
||||
for tc in _raw_tc:
|
||||
provider_data = {}
|
||||
if hasattr(tc, "call_id") and tc.call_id:
|
||||
provider_data["call_id"] = tc.call_id
|
||||
if hasattr(tc, "response_item_id") and tc.response_item_id:
|
||||
provider_data["response_item_id"] = tc.response_item_id
|
||||
tool_calls.append(ToolCall(
|
||||
id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
|
||||
name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
|
||||
arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
|
||||
provider_data=provider_data or None,
|
||||
))
|
||||
|
||||
# Extract reasoning items for provider_data
|
||||
provider_data = {}
|
||||
|
||||
156
docs/experiments/semantic-rle-context-engine.md
Normal file
156
docs/experiments/semantic-rle-context-engine.md
Normal file
@ -0,0 +1,156 @@
|
||||
# Semantic RLE Context Engine experiment
|
||||
|
||||
Status: experimental MVP, not enabled by default.
|
||||
|
||||
## Hypothesis
|
||||
|
||||
For very long Telegram chats, a deterministic context engine with:
|
||||
|
||||
- a verbatim hot tail;
|
||||
- a semantic factual ledger for older turns;
|
||||
- explicit stale/superseded facts;
|
||||
- credential references instead of raw secrets;
|
||||
- retrieval notes for manual compression focus;
|
||||
|
||||
will reduce "misses" versus the current lossy compression path, especially when users correct facts over time ("server X" -> "server Y") or return to obligations created many turns ago.
|
||||
|
||||
## MVP design
|
||||
|
||||
Plugin: `plugins/context_engine/semantic_rle/`.
|
||||
|
||||
Compression output shape:
|
||||
|
||||
1. original system messages;
|
||||
2. `system` summary block named `Semantic RLE context ledger`;
|
||||
3. last `hot_tail_messages` non-system messages preserved verbatim.
|
||||
|
||||
The ledger is deterministic and local-only. No cloud LLM is called by the plugin.
|
||||
|
||||
Ledger sections:
|
||||
|
||||
- Active facts
|
||||
- Decisions
|
||||
- Obligations
|
||||
- Superseded facts
|
||||
- Unresolved questions
|
||||
- Credential refs
|
||||
- Retrieval notes
|
||||
|
||||
Best-effort redaction:
|
||||
|
||||
- key/value secrets (`api_key=...`, `token: ...`, `Authorization: ...`) become stable `credential_ref:credential:<hash>` references;
|
||||
- token-like long strings become credential refs;
|
||||
- IPv4-like strings become `[REDACTED_IP]`.
|
||||
|
||||
## What counts as a miss
|
||||
|
||||
A run has a miss when the assistant, after compaction, does any of these:
|
||||
|
||||
1. uses an inactive/superseded fact as current;
|
||||
2. loses a still-active obligation or TODO;
|
||||
3. says it does not know a fact that was in the compacted cold history;
|
||||
4. exposes raw fake credentials or sensitive IP-like strings from cold history;
|
||||
5. answers from an older decision when a newer decision superseded it;
|
||||
6. needs user correction for information that the ledger retained.
|
||||
|
||||
## Baseline
|
||||
|
||||
Baseline should be the same Telegram session corpus and prompt set with:
|
||||
|
||||
```yaml
|
||||
context:
|
||||
engine: compressor
|
||||
```
|
||||
|
||||
Record:
|
||||
|
||||
- number of manual corrections per 100 turns;
|
||||
- number of stale-fact answers per scenario;
|
||||
- number of obligation/TODO misses;
|
||||
- raw secret leakage count in model-visible compressed context;
|
||||
- compressed message count and rough token count;
|
||||
- whether final hot-tail messages remain byte-for-byte unchanged.
|
||||
|
||||
## First A/B plan
|
||||
|
||||
1. Select 5-10 long Telegram transcripts with known corrections and recurring tasks.
|
||||
2. Replay fixed query checkpoints against baseline `compressor` and experimental `semantic_rle`.
|
||||
3. Keep model/provider/toolsets constant.
|
||||
4. For each checkpoint, force compression before asking the evaluation query.
|
||||
5. Score blind if possible: correct/current, stale, missing, unsafe leak, or ambiguous.
|
||||
|
||||
## Test scenarios
|
||||
|
||||
- Hot tail preservation: last N messages must be unchanged.
|
||||
- Server supersession: `server alpha` followed by `server beta` should keep beta active and mark alpha superseded.
|
||||
- Fake token redaction: no raw fake token appears in compressed output; only credential refs.
|
||||
- IP-like redaction: raw IPv4-like strings do not appear in the ledger.
|
||||
- Obligations: old `todo`/`надо` messages survive as obligations.
|
||||
- Unresolved questions: old question markers survive as unresolved questions.
|
||||
- Deterministic failure: if extraction raises, return original messages rather than dropping context.
|
||||
- Discovery/config: plugin can be discovered and explicitly loaded, but is not globally enabled by adding it to the repo.
|
||||
|
||||
## Manual enablement for experiment only
|
||||
|
||||
Do not turn it on globally in commits. For a local experiment:
|
||||
|
||||
```bash
|
||||
hermes config set context.engine semantic_rle
|
||||
# restart the CLI session or gateway process you intentionally want to test
|
||||
```
|
||||
|
||||
Return to default:
|
||||
|
||||
```bash
|
||||
hermes config set context.engine compressor
|
||||
```
|
||||
|
||||
Gateway note: do not restart production gateway just because the plugin exists. Restart only a deliberately selected experiment profile/session.
|
||||
|
||||
## Deterministic smoke-eval
|
||||
|
||||
Run:
|
||||
|
||||
```bash
|
||||
python scripts/semantic_rle_eval.py
|
||||
python scripts/semantic_rle_eval.py --json
|
||||
```
|
||||
|
||||
Current invariant results on the checked-in synthetic corpus:
|
||||
|
||||
- `hot_tail_only_baseline`: 4/12 checks passed.
|
||||
- `semantic_rle`: 12/12 checks passed.
|
||||
|
||||
Covered invariants:
|
||||
|
||||
- current fact retained after supersession;
|
||||
- old fact marked superseded;
|
||||
- old decision retained;
|
||||
- old obligation retained;
|
||||
- old unresolved question retained;
|
||||
- cold fake token/IP redacted to refs/markers;
|
||||
- hot tail preserved byte-for-byte.
|
||||
|
||||
This is only a deterministic preflight. It proves the plugin keeps the facts in model-visible context; it does **not** prove the LLM will use them correctly in live Telegram replay.
|
||||
|
||||
## Logical MVP boundary
|
||||
|
||||
Done for MVP:
|
||||
|
||||
- plugin discovery and explicit loading;
|
||||
- ContextEngine ABC compatibility;
|
||||
- local-only deterministic compression path;
|
||||
- fail-closed compression error handling;
|
||||
- hot tail preservation;
|
||||
- factual ledger with active/superseded facts;
|
||||
- decisions/obligations/questions sections;
|
||||
- cold-history fake secret and IPv4 redaction;
|
||||
- smoke-eval harness and tests.
|
||||
|
||||
Still intentionally out of MVP:
|
||||
|
||||
- Better fact keys: current MVP is regex-based and intentionally conservative.
|
||||
- Persistence: ledger is in-memory only; no per-chat store yet.
|
||||
- Retrieval tools: no `semantic_rle_search` tool yet.
|
||||
- Multilingual extraction: Russian TODO/question markers are minimal.
|
||||
- Live Telegram replay/scoring against real transcripts.
|
||||
@ -30,6 +30,7 @@ try:
|
||||
Application,
|
||||
CommandHandler,
|
||||
CallbackQueryHandler,
|
||||
MessageReactionHandler,
|
||||
MessageHandler as TelegramMessageHandler,
|
||||
ContextTypes,
|
||||
filters,
|
||||
@ -48,6 +49,7 @@ except ImportError:
|
||||
Application = Any
|
||||
CommandHandler = Any
|
||||
CallbackQueryHandler = Any
|
||||
MessageReactionHandler = Any
|
||||
TelegramMessageHandler = Any
|
||||
HTTPXRequest = Any
|
||||
filters = None
|
||||
@ -118,7 +120,7 @@ def check_telegram_requirements() -> bool:
|
||||
"""
|
||||
global TELEGRAM_AVAILABLE, Update, Bot, Message, InlineKeyboardButton
|
||||
global InlineKeyboardMarkup, LinkPreviewOptions, Application
|
||||
global CommandHandler, CallbackQueryHandler, TelegramMessageHandler
|
||||
global CommandHandler, CallbackQueryHandler, MessageReactionHandler, TelegramMessageHandler
|
||||
global ContextTypes, filters, ParseMode, ChatType, HTTPXRequest
|
||||
if TELEGRAM_AVAILABLE:
|
||||
return True
|
||||
@ -137,6 +139,7 @@ def check_telegram_requirements() -> bool:
|
||||
from telegram.ext import (
|
||||
Application as _App, CommandHandler as _CH,
|
||||
CallbackQueryHandler as _CQH,
|
||||
MessageReactionHandler as _MRH,
|
||||
MessageHandler as _MH,
|
||||
ContextTypes as _CT, filters as _filters,
|
||||
)
|
||||
@ -153,6 +156,7 @@ def check_telegram_requirements() -> bool:
|
||||
Application = _App
|
||||
CommandHandler = _CH
|
||||
CallbackQueryHandler = _CQH
|
||||
MessageReactionHandler = _MRH
|
||||
TelegramMessageHandler = _MH
|
||||
ContextTypes = _CT
|
||||
filters = _filters
|
||||
@ -1581,6 +1585,14 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
))
|
||||
# Handle inline keyboard button callbacks (update prompts)
|
||||
self._app.add_handler(CallbackQueryHandler(self._handle_callback_query))
|
||||
|
||||
# Handle user reactions to bot/news messages. The existing
|
||||
# telegram.reactions flag controls the bot's own processing
|
||||
# lifecycle reactions (👀/👍/👎); this handler is for inbound
|
||||
# Telegram ``message_reaction`` updates so reaction-only feedback
|
||||
# can reach the agent/session.
|
||||
if MessageReactionHandler is not Any:
|
||||
self._app.add_handler(MessageReactionHandler(self._handle_message_reaction))
|
||||
|
||||
# Start polling — retry initialize() for transient TLS resets
|
||||
try:
|
||||
@ -2073,6 +2085,46 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
is_connect_timeout = self._looks_like_connect_timeout(e)
|
||||
return SendResult(success=False, error=str(e), retryable=(is_connect_timeout or not is_timeout))
|
||||
|
||||
def _thread_metadata_for_voice_ack(self, event: MessageEvent) -> Optional[Dict[str, Any]]:
|
||||
"""Build thread metadata for a platform-side pre-STT voice ACK.
|
||||
|
||||
The ACK is sent before gateway STT calls run, so it must stay entirely
|
||||
platform-side and must not become an assistant/model turn. Reuse the
|
||||
same Telegram thread/DM-topic metadata shape the runner uses for normal
|
||||
replies so the short status lands in the same chat/topic.
|
||||
"""
|
||||
source = event.source
|
||||
thread_id = getattr(source, "thread_id", None)
|
||||
if thread_id is None:
|
||||
return None
|
||||
metadata: Dict[str, Any] = {"thread_id": thread_id}
|
||||
if getattr(source, "platform", None) == Platform.TELEGRAM and getattr(source, "chat_type", None) == "dm":
|
||||
metadata["telegram_dm_topic_reply_fallback"] = True
|
||||
tid = str(thread_id)
|
||||
if tid and tid not in {"", "1"}:
|
||||
metadata["direct_messages_topic_id"] = tid
|
||||
anchor = getattr(source, "message_id", None) or event.message_id
|
||||
if anchor is not None:
|
||||
metadata["telegram_reply_to_message_id"] = str(anchor)
|
||||
return metadata
|
||||
|
||||
async def _send_voice_pre_stt_ack(self, event: MessageEvent) -> None:
|
||||
"""Send a best-effort Telegram status before slow voice transcription."""
|
||||
try:
|
||||
result = await self.send(
|
||||
event.source.chat_id,
|
||||
"🎧 получил, распознаю",
|
||||
reply_to=event.message_id,
|
||||
metadata=self._thread_metadata_for_voice_ack(event),
|
||||
)
|
||||
if not result.success:
|
||||
logger.warning(
|
||||
"[Telegram] Pre-STT voice ACK was not delivered: %s",
|
||||
result.error or "unknown error",
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("[Telegram] Failed to send pre-STT voice ACK; continuing transcription")
|
||||
|
||||
async def send_or_update_status(
|
||||
self,
|
||||
chat_id: str,
|
||||
@ -4947,6 +4999,113 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
return getattr(update, "effective_message", None) or getattr(update, "message", None)
|
||||
|
||||
async def _handle_message_reaction(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
"""Handle Telegram user reactions as lightweight synthetic messages."""
|
||||
reaction_update = getattr(update, "message_reaction", None)
|
||||
if reaction_update is None:
|
||||
return
|
||||
|
||||
user = getattr(reaction_update, "user", None)
|
||||
user_id = str(getattr(user, "id", "") or "")
|
||||
bot_id = str(getattr(getattr(self, "_bot", None), "id", "") or "")
|
||||
if user_id and bot_id and user_id == bot_id:
|
||||
return
|
||||
|
||||
chat = getattr(reaction_update, "chat", None)
|
||||
chat_id = str(getattr(chat, "id", "") or "")
|
||||
if not chat_id or not user_id:
|
||||
return
|
||||
|
||||
chat_type_raw = getattr(chat, "type", None)
|
||||
chat_type_value = str(getattr(chat_type_raw, "value", chat_type_raw) or "dm").lower()
|
||||
chat_type = "dm" if chat_type_value == "private" else (
|
||||
"group" if chat_type_value in {"group", "supergroup"} else chat_type_value
|
||||
)
|
||||
thread_id = getattr(reaction_update, "message_thread_id", None)
|
||||
user_name = getattr(user, "first_name", None) or getattr(user, "username", None)
|
||||
|
||||
if not self._is_callback_user_authorized(
|
||||
user_id,
|
||||
chat_id=chat_id,
|
||||
chat_type=chat_type,
|
||||
thread_id=str(thread_id) if thread_id is not None else None,
|
||||
user_name=user_name,
|
||||
):
|
||||
logger.info("[%s] Ignoring Telegram reaction from unauthorized user %s", self.name, user_id)
|
||||
return
|
||||
|
||||
def _reaction_label(obj: Any) -> str:
|
||||
emoji = getattr(obj, "emoji", None)
|
||||
if emoji:
|
||||
return str(emoji)
|
||||
custom = getattr(obj, "custom_emoji_id", None)
|
||||
if custom:
|
||||
return f"custom_emoji:{custom}"
|
||||
return str(getattr(obj, "type", obj))
|
||||
|
||||
new_reactions = list(getattr(reaction_update, "new_reaction", None) or [])
|
||||
old_reactions = list(getattr(reaction_update, "old_reaction", None) or [])
|
||||
new_labels = [_reaction_label(r) for r in new_reactions]
|
||||
old_labels = [_reaction_label(r) for r in old_reactions]
|
||||
target_message_id = getattr(reaction_update, "message_id", None)
|
||||
|
||||
if not new_labels:
|
||||
logger.info(
|
||||
"[%s] Telegram reaction removed: chat=%s user=%s message=%s previous=%s",
|
||||
self.name,
|
||||
chat_id,
|
||||
user_id,
|
||||
target_message_id,
|
||||
" ".join(old_labels) if old_labels else "<unknown>",
|
||||
)
|
||||
return
|
||||
|
||||
positive_labels = {"👍", "🔥", "👌", "❤️", "❤", "😍", "🥰", "👏", "🎉", "🤩"}
|
||||
negative_labels = {"👎", "💩", "🤮", "😡", "😠", "🙄", "😴", "🤢"}
|
||||
label_set = set(new_labels)
|
||||
if label_set & negative_labels:
|
||||
feedback = "negative feedback / мимо"
|
||||
elif label_set & positive_labels:
|
||||
feedback = "positive feedback / годно"
|
||||
else:
|
||||
feedback = "emoji feedback"
|
||||
|
||||
text = (
|
||||
f"[Telegram reaction feedback: {' '.join(new_labels)} on message {target_message_id}. "
|
||||
f"Interpretation: {feedback}. "
|
||||
f"If the target was a cron/news/digest item, use this as taste feedback.]"
|
||||
)
|
||||
|
||||
from gateway.session import SessionSource
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id=chat_id,
|
||||
chat_name=getattr(chat, "title", None),
|
||||
chat_type=chat_type,
|
||||
user_id=user_id,
|
||||
user_name=str(user_name) if user_name else None,
|
||||
thread_id=str(thread_id) if thread_id is not None else None,
|
||||
message_id=str(target_message_id) if target_message_id is not None else None,
|
||||
)
|
||||
event = MessageEvent(
|
||||
text=text,
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
raw_message=reaction_update,
|
||||
message_id=str(target_message_id) if target_message_id is not None else None,
|
||||
platform_update_id=update.update_id,
|
||||
)
|
||||
logger.info(
|
||||
"[%s] Telegram reaction received: chat=%s user=%s message=%s emoji=%s",
|
||||
self.name,
|
||||
chat_id,
|
||||
user_id,
|
||||
target_message_id,
|
||||
" ".join(new_labels) if new_labels else "<removed>",
|
||||
)
|
||||
await self.handle_message(event)
|
||||
|
||||
async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
"""Handle incoming text messages.
|
||||
|
||||
@ -5257,8 +5416,9 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
event.media_urls = [cached_path]
|
||||
event.media_types = ["audio/ogg"]
|
||||
logger.info("[Telegram] Cached user voice at %s", cached_path)
|
||||
await self._send_voice_pre_stt_ack(event)
|
||||
except Exception as e:
|
||||
logger.warning("[Telegram] Failed to cache voice: %s", e, exc_info=True)
|
||||
logger.warning("[Telegram] Failed to cache voice before STT: %s", e, exc_info=True)
|
||||
elif msg.audio:
|
||||
try:
|
||||
file_obj = await msg.audio.get_file()
|
||||
|
||||
349
gateway/run.py
349
gateway/run.py
@ -1266,6 +1266,23 @@ def _is_control_interrupt_message(message: Optional[str]) -> bool:
|
||||
return normalized in _CONTROL_INTERRUPT_MESSAGES
|
||||
|
||||
|
||||
def _should_process_pending_during_drain(result: Optional[dict]) -> bool:
|
||||
"""Return True when a queued follow-up is safe to run before shutdown.
|
||||
|
||||
If a user sends a follow-up while an active gateway turn is running, the
|
||||
adapter stores it in the pending slot. A service restart can begin while
|
||||
that active turn is still running. When the active turn then finishes
|
||||
cleanly during the drain window, dropping the pending slot loses the user's
|
||||
message. Process it before exit unless the active turn itself ended via an
|
||||
interruption/failure, where continuing work during shutdown would be unsafe.
|
||||
"""
|
||||
if not result:
|
||||
return False
|
||||
if result.get("interrupted") or result.get("failed"):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _skill_slug_from_frontmatter(skill_md: Path) -> tuple[str | None, str | None]:
|
||||
"""Derive the /command slug and declared frontmatter name from a SKILL.md.
|
||||
|
||||
@ -7535,6 +7552,9 @@ class GatewayRunner:
|
||||
if canonical == "insights":
|
||||
return await self._handle_insights_command(event)
|
||||
|
||||
if canonical == "stats":
|
||||
return await self._handle_stats_command(event)
|
||||
|
||||
if canonical == "reload-mcp":
|
||||
return await self._handle_reload_mcp_command(event)
|
||||
|
||||
@ -8216,6 +8236,28 @@ class GatewayRunner:
|
||||
context_note = "[System note: The user's previous session expired due to inactivity. This is a fresh conversation with no prior context.]"
|
||||
context_prompt = context_note + "\n\n" + context_prompt
|
||||
|
||||
# If the previous agent turn crashed, prepend a crash-context notice
|
||||
# so the agent knows what went wrong and can self-diagnose. The
|
||||
# error fields are cleared after this read so the notice appears
|
||||
# exactly once (on the first message of the fresh session).
|
||||
_last_err_type = getattr(session_entry, 'last_error_type', None)
|
||||
if _last_err_type:
|
||||
_last_err_msg = getattr(session_entry, 'last_error_message', '') or ''
|
||||
_crash_note = (
|
||||
f"[System note: The previous session crashed with "
|
||||
f"{_last_err_type}: {_last_err_msg[:300]}. "
|
||||
f"Diagnose and fix the root cause if possible — "
|
||||
f"do NOT just retry the same thing.]"
|
||||
)
|
||||
context_prompt = _crash_note + "\n\n" + context_prompt
|
||||
# Clear so the notice appears only once
|
||||
try:
|
||||
session_entry.last_error_type = None
|
||||
session_entry.last_error_message = None
|
||||
session_entry.last_error_time = None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Send a user-facing notification explaining the reset, unless:
|
||||
# - notifications are disabled in config
|
||||
# - the platform is excluded (e.g. api_server, webhook)
|
||||
@ -9098,6 +9140,14 @@ class GatewayRunner:
|
||||
logger.exception("Agent error in session %s", session_key)
|
||||
error_type = type(e).__name__
|
||||
error_detail = str(e)[:300] if str(e) else "no details available"
|
||||
# Record the crash on the session entry so the next session's
|
||||
# agent can self-diagnose the failure instead of starting blind.
|
||||
try:
|
||||
session_entry.last_error_type = error_type
|
||||
session_entry.last_error_message = error_detail
|
||||
session_entry.last_error_time = time.time()
|
||||
except Exception:
|
||||
pass # defensive — never let error-recording cause a secondary crash
|
||||
status_hint = ""
|
||||
status_code = getattr(e, "status_code", None)
|
||||
_hist_len = len(history) if 'history' in locals() else 0
|
||||
@ -13234,6 +13284,196 @@ class GatewayRunner:
|
||||
logger.error("Insights command error: %s", e, exc_info=True)
|
||||
return t("gateway.insights.error", error=e)
|
||||
|
||||
async def _handle_stats_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /stats command — comprehensive system and model report.
|
||||
|
||||
Pure local compute — no LLM call, no prompt-cache impact. Surfaces
|
||||
model config, context engine, skills usage, curator state, cron jobs,
|
||||
and recent session activity in a compact dashboard.
|
||||
"""
|
||||
import json, time as _time, os as _os
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
source = event.source
|
||||
session_entry = self.session_store.get_or_create_session(source)
|
||||
lines: list[str] = []
|
||||
|
||||
# ── Header ──────────────────────────────────────────────────────
|
||||
_now = datetime.now(timezone.utc)
|
||||
lines.append("📊 **Hermes Stats**")
|
||||
lines.append("")
|
||||
|
||||
# ── Model ───────────────────────────────────────────────────────
|
||||
cfg = self._read_user_config()
|
||||
model_cfg = cfg.get("model", {}) if isinstance(cfg, dict) else {}
|
||||
main_model = model_cfg.get("default", "") if isinstance(model_cfg, dict) else str(model_cfg)
|
||||
main_provider = model_cfg.get("provider", "") if isinstance(model_cfg, dict) else ""
|
||||
|
||||
fallback_chain = cfg.get("fallback_providers", []) if isinstance(cfg, dict) else []
|
||||
fb_models = [f"{fb.get('model','?')} ({fb.get('provider','?')})" for fb in fallback_chain[:3]]
|
||||
|
||||
# Context length — from cached agent if available, else config
|
||||
ctx_len = "?"
|
||||
ctx_engine_name = "compressor"
|
||||
try:
|
||||
_cache_lock = getattr(self, "_agent_cache_lock", None)
|
||||
if _cache_lock:
|
||||
with _cache_lock:
|
||||
_cached = self._agent_cache.get(session_entry.session_key)
|
||||
if _cached:
|
||||
_agent, _, _ts = _cached
|
||||
_cc = getattr(_agent, "context_compressor", None)
|
||||
if _cc:
|
||||
ctx_len = f"{getattr(_cc, 'context_length', 0):,}"
|
||||
_ce = getattr(_cc, "name", None) or type(_cc).__name__
|
||||
ctx_engine_name = _ce.lower() if _ce != "ContextCompressor" else "compressor"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
lines.append("**🤖 Model**")
|
||||
lines.append(f" Main: `{main_model}` ({main_provider})")
|
||||
if fb_models:
|
||||
lines.append(f" Fallback: {', '.join(fb_models)}")
|
||||
lines.append(f" Context: {ctx_len} tokens | Engine: {ctx_engine_name}")
|
||||
lines.append("")
|
||||
|
||||
# ── Context Engine (Semantic RLE) ───────────────────────────────
|
||||
_rle_dir = _os.path.expanduser("~/.hermes/hermes-agent/plugins/context_engine/semantic_rle")
|
||||
_rle_plugin_yaml = _os.path.join(_rle_dir, "plugin.yaml") if _os.path.isdir(_rle_dir) else ""
|
||||
rle_version = "?"
|
||||
if _rle_plugin_yaml and _os.path.isfile(_rle_plugin_yaml):
|
||||
try:
|
||||
import yaml as _yaml
|
||||
with open(_rle_plugin_yaml) as f:
|
||||
_rle_cfg = _yaml.safe_load(f) or {}
|
||||
rle_version = _rle_cfg.get("version", "?")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_rle_stats = {}
|
||||
try:
|
||||
_rle_dir_cache = _os.path.join(_rle_dir, "__pycache__")
|
||||
if _os.path.isdir(_rle_dir_cache):
|
||||
_rle_stats["cached"] = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
lines.append("**🧠 Context Engine**")
|
||||
if _os.path.isdir(_rle_dir):
|
||||
lines.append(f" Semantic RLE: v{rle_version} ✓")
|
||||
else:
|
||||
lines.append(" Semantic RLE: not installed")
|
||||
lines.append("")
|
||||
|
||||
# ── Skills Usage ────────────────────────────────────────────────
|
||||
_skills_usage_path = _os.path.expanduser("~/.hermes/skills/.usage.json")
|
||||
skills_active = 0
|
||||
skills_agent = 0
|
||||
skills_top: list[tuple[str, int]] = []
|
||||
if _os.path.isfile(_skills_usage_path):
|
||||
try:
|
||||
with open(_skills_usage_path) as f:
|
||||
_su = json.load(f)
|
||||
for _sn, _sd in _su.items():
|
||||
if _sd.get("state") == "active":
|
||||
skills_active += 1
|
||||
if _sd.get("created_by") == "agent":
|
||||
skills_agent += 1
|
||||
_by_use = sorted(_su.items(), key=lambda x: x[1].get("use_count", 0), reverse=True)
|
||||
skills_top = [(_sn, _sd.get("use_count", 0)) for _sn, _sd in _by_use[:5]]
|
||||
except Exception:
|
||||
pass
|
||||
lines.append("**📚 Skills**")
|
||||
lines.append(f" Active: {skills_active} | Agent-created: {skills_agent}")
|
||||
if skills_top:
|
||||
_top_str = ", ".join(f"`{n}` ({c})" for n, c in skills_top)
|
||||
lines.append(f" Top: {_top_str}")
|
||||
lines.append("")
|
||||
|
||||
# ── Curator ─────────────────────────────────────────────────────
|
||||
_curator_state_path = _os.path.expanduser("~/.hermes/skills/.curator_state.json")
|
||||
curator_runs = 0
|
||||
curator_last = "never"
|
||||
curator_archived = 0
|
||||
if _os.path.isfile(_curator_state_path):
|
||||
try:
|
||||
with open(_curator_state_path) as f:
|
||||
_cs = json.load(f)
|
||||
curator_runs = _cs.get("runs", 0)
|
||||
_cls = _cs.get("last_run")
|
||||
if _cls:
|
||||
_dt = datetime.fromisoformat(_cls.replace("Z", "+00:00"))
|
||||
_delta = _now - _dt
|
||||
if _delta.days > 0:
|
||||
curator_last = f"{_delta.days}d ago"
|
||||
elif _delta.seconds > 3600:
|
||||
curator_last = f"{_delta.seconds // 3600}h ago"
|
||||
else:
|
||||
curator_last = f"{_delta.seconds // 60}m ago"
|
||||
curator_archived = _cs.get("archived_count", 0)
|
||||
except Exception:
|
||||
pass
|
||||
lines.append("**🌱 Curator**")
|
||||
lines.append(f" Runs: {curator_runs} | Last: {curator_last} | Archived: {curator_archived}")
|
||||
lines.append("")
|
||||
|
||||
# ── Cron ────────────────────────────────────────────────────────
|
||||
try:
|
||||
from cron.jobs import load_jobs
|
||||
_jobs = load_jobs()
|
||||
_active_jobs = [j for j in _jobs if j.get("paused") != True]
|
||||
_job_names = [j.get("name", j.get("job_id", "?")[:8]) for j in _active_jobs[:5]]
|
||||
lines.append("**⏰ Cron**")
|
||||
if _job_names:
|
||||
lines.append(f" Active: {len(_active_jobs)} — {', '.join(_job_names)}")
|
||||
else:
|
||||
lines.append(" Active: 0")
|
||||
lines.append("")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Recent Activity ─────────────────────────────────────────────
|
||||
try:
|
||||
from hermes_state import SessionDB
|
||||
_db = SessionDB()
|
||||
_recent = _db.list_sessions(limit=5, offset=0)
|
||||
_db.close()
|
||||
if _recent:
|
||||
_recent_24h = 0
|
||||
_recent_msgs = 0
|
||||
_cutoff = _now - timedelta(hours=24)
|
||||
for _rs in _recent:
|
||||
_t = _rs.get("updated_at") or _rs.get("created_at")
|
||||
if _t:
|
||||
_dt = datetime.fromisoformat(str(_t).replace("Z", "+00:00")) if isinstance(_t, str) else _t
|
||||
if _dt > _cutoff:
|
||||
_recent_24h += 1
|
||||
_recent_msgs += _rs.get("message_count", 0)
|
||||
lines.append("**⏱️ Activity (24h)**")
|
||||
lines.append(f" Sessions: {_recent_24h} | Messages: {_recent_msgs}")
|
||||
_latest = _recent[0] if _recent else {}
|
||||
_latest_title = _latest.get("title") or _latest.get("session_id", "?")
|
||||
_latest_preview = (_latest.get("preview") or "")[:80]
|
||||
if _latest_preview:
|
||||
lines.append(f" Latest: _{_latest_preview}_")
|
||||
lines.append("")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── System ──────────────────────────────────────────────────────
|
||||
try:
|
||||
import psutil
|
||||
_proc = psutil.Process()
|
||||
_mem_mb = _proc.memory_info().rss / 1024 / 1024
|
||||
lines.append("**🔧 System**")
|
||||
lines.append(f" Memory: {_mem_mb:.0f} MB")
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _handle_reload_mcp_command(self, event: MessageEvent) -> Optional[str]:
|
||||
"""Handle /reload-mcp — reconnect MCP servers and rebuild the cached agent.
|
||||
|
||||
@ -14585,6 +14825,79 @@ class GatewayRunner:
|
||||
return prefix
|
||||
return user_text
|
||||
|
||||
@staticmethod
|
||||
def _transcription_looks_uncertain(transcript: str) -> bool:
|
||||
"""Heuristic gate for optional OpenAI Whisper fallback.
|
||||
|
||||
Keep this deliberately conservative: the fallback is paid and slower,
|
||||
so we only retry when the primary transcript explicitly looks broken or
|
||||
uncertain, not for every voice note.
|
||||
"""
|
||||
text = (transcript or "").strip()
|
||||
if not text:
|
||||
return True
|
||||
lowered = text.lower()
|
||||
uncertainty_markers = (
|
||||
"inaudible",
|
||||
"unintelligible",
|
||||
"can't understand",
|
||||
"cannot understand",
|
||||
"could not understand",
|
||||
"неразбор",
|
||||
"не слышно",
|
||||
"плохо слышно",
|
||||
"не понял",
|
||||
"непонятно",
|
||||
)
|
||||
if any(marker in lowered for marker in uncertainty_markers):
|
||||
return True
|
||||
if "<EFBFBD>" in text:
|
||||
return True
|
||||
compact = lowered.strip(" .,…!?-—\n\t")
|
||||
return compact in {"", "...", "[music]", "(music)", "тишина"}
|
||||
|
||||
def _should_try_openai_whisper_fallback(self, result: Dict[str, Any]) -> bool:
|
||||
"""Return True only for the configured fast OpenAI model + uncertain text."""
|
||||
if not result.get("success"):
|
||||
return False
|
||||
try:
|
||||
from tools.transcription_tools import _load_stt_config
|
||||
stt_config = _load_stt_config()
|
||||
except Exception:
|
||||
logger.exception("Failed to load STT config while evaluating fallback policy")
|
||||
return False
|
||||
if str(stt_config.get("provider", "")).lower() != "openai":
|
||||
return False
|
||||
openai_cfg = stt_config.get("openai", {}) if isinstance(stt_config.get("openai"), dict) else {}
|
||||
model = str(openai_cfg.get("model", "")).strip()
|
||||
if model != "gpt-4o-mini-transcribe":
|
||||
return False
|
||||
return self._transcription_looks_uncertain(str(result.get("transcript") or ""))
|
||||
|
||||
async def _transcribe_audio_with_fallback(self, transcribe_audio, path: str) -> Dict[str, Any]:
|
||||
"""Transcribe once, then optionally retry uncertain mini-transcribe output with whisper-1."""
|
||||
result = await asyncio.to_thread(transcribe_audio, path)
|
||||
if not self._should_try_openai_whisper_fallback(result):
|
||||
return result
|
||||
|
||||
logger.info("Primary OpenAI STT transcript looks uncertain; retrying %s with whisper-1", os.path.basename(path))
|
||||
try:
|
||||
fallback = await asyncio.to_thread(transcribe_audio, path, model="whisper-1")
|
||||
except Exception:
|
||||
logger.exception("OpenAI Whisper-1 STT fallback failed for %s; keeping primary transcript", path)
|
||||
result["stt_uncertain"] = True
|
||||
return result
|
||||
if fallback.get("success") and str(fallback.get("transcript") or "").strip():
|
||||
fallback["fallback_used"] = "whisper-1"
|
||||
return fallback
|
||||
logger.warning(
|
||||
"OpenAI Whisper-1 STT fallback did not produce a usable transcript for %s: %s",
|
||||
path,
|
||||
fallback.get("error") or "empty transcript",
|
||||
)
|
||||
result["stt_uncertain"] = True
|
||||
return result
|
||||
|
||||
async def _enrich_message_with_transcription(
|
||||
self,
|
||||
user_text: str,
|
||||
@ -14628,13 +14941,20 @@ class GatewayRunner:
|
||||
for path in audio_paths:
|
||||
try:
|
||||
logger.debug("Transcribing user voice: %s", path)
|
||||
result = await asyncio.to_thread(transcribe_audio, path)
|
||||
result = await self._transcribe_audio_with_fallback(transcribe_audio, path)
|
||||
if result["success"]:
|
||||
transcript = result["transcript"]
|
||||
enriched_parts.append(
|
||||
note = (
|
||||
f'[The user sent a voice message~ '
|
||||
f'Here\'s what they said: "{transcript}"]'
|
||||
f'Here\'s what they said: "{transcript}"'
|
||||
)
|
||||
if result.get("stt_uncertain") or self._transcription_looks_uncertain(str(transcript)):
|
||||
note += (
|
||||
" The transcript still looks uncertain; don't guess. "
|
||||
"Ask the user a short clarification in Russian."
|
||||
)
|
||||
note += "]"
|
||||
enriched_parts.append(note)
|
||||
else:
|
||||
error = result.get("error", "unknown error")
|
||||
if (
|
||||
@ -14661,7 +14981,7 @@ class GatewayRunner:
|
||||
f"transcribing it~ ({error})]"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Transcription error: %s", e)
|
||||
logger.error("Transcription error for %s: %s", path, e, exc_info=True)
|
||||
enriched_parts.append(
|
||||
"[The user sent a voice message but something went wrong "
|
||||
"when I tried to listen to it~ Let them know!]"
|
||||
@ -17667,13 +17987,20 @@ class GatewayRunner:
|
||||
pass
|
||||
|
||||
if self._draining and (pending_event or pending):
|
||||
logger.info(
|
||||
"Discarding pending follow-up for session %s during gateway %s",
|
||||
session_key or "?",
|
||||
self._status_action_label(),
|
||||
)
|
||||
pending_event = None
|
||||
pending = None
|
||||
if _should_process_pending_during_drain(result):
|
||||
logger.info(
|
||||
"Processing pending follow-up for session %s before gateway %s completes",
|
||||
session_key or "?",
|
||||
self._status_action_label(),
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Discarding pending follow-up for session %s during gateway %s",
|
||||
session_key or "?",
|
||||
self._status_action_label(),
|
||||
)
|
||||
pending_event = None
|
||||
pending = None
|
||||
|
||||
if pending_event or pending:
|
||||
logger.debug("Processing pending message: '%s...'", pending[:40])
|
||||
|
||||
@ -453,6 +453,13 @@ class SessionEntry:
|
||||
# Last API-reported prompt tokens (for accurate compression pre-check)
|
||||
last_prompt_tokens: int = 0
|
||||
|
||||
# Set when the previous agent turn crashed with an unhandled exception.
|
||||
# Consumed once by the message handler to inject a crash-context notice
|
||||
# into the next session's system prompt so the agent can self-diagnose.
|
||||
last_error_type: Optional[str] = None
|
||||
last_error_message: Optional[str] = None
|
||||
last_error_time: Optional[float] = None
|
||||
|
||||
# Set when a session was created because the previous one expired;
|
||||
# consumed once by the message handler to inject a notice into context
|
||||
was_auto_reset: bool = False
|
||||
|
||||
@ -98,6 +98,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
aliases=("bg", "btw"), args_hint="<prompt>"),
|
||||
CommandDef("agents", "Show active agents and running tasks", "Session",
|
||||
aliases=("tasks",)),
|
||||
CommandDef("stats", "Show comprehensive system stats — model, skills, curator, cron, activity", "Info"),
|
||||
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
|
||||
aliases=("q",), args_hint="<prompt>"),
|
||||
CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session",
|
||||
|
||||
300
plugins/context_engine/semantic_rle/__init__.py
Normal file
300
plugins/context_engine/semantic_rle/__init__.py
Normal file
@ -0,0 +1,300 @@
|
||||
"""Experimental Semantic RLE context engine.
|
||||
|
||||
MVP goals:
|
||||
- keep the hot tail verbatim;
|
||||
- collapse older chat into a deterministic factual ledger;
|
||||
- mark superseded facts instead of silently forgetting them;
|
||||
- redact likely credentials and IP-like sensitive strings before ledgering.
|
||||
|
||||
This plugin is intentionally deterministic and does not call cloud LLMs.
|
||||
It is not enabled by default; select it explicitly with ``context.engine``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
from agent.context_engine import ContextEngine
|
||||
|
||||
Message = Dict[str, Any]
|
||||
|
||||
_TOKEN_PATTERNS: tuple[re.Pattern[str], ...] = (
|
||||
re.compile(r"\b(?:sk|xox[baprs]?|gh[pousr]|hf|AIza|ya29|pat|tok)[-_][A-Za-z0-9_./+=-]{12,}\b"),
|
||||
re.compile(r"\b[A-Za-z0-9_./+=-]{32,}\b"),
|
||||
)
|
||||
_KEY_VALUE_SECRET_RE = re.compile(
|
||||
r"(?i)\b(api[_-]?key|token|secret|password|passwd|authorization|bearer)\b\s*[:=]\s*([^\s,;]+)"
|
||||
)
|
||||
_IPV4_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
|
||||
_SERVER_RE = re.compile(r"(?i)\bserver\s*(?:is|=|:|->|to)?\s*([A-Za-z0-9][A-Za-z0-9._-]{1,})")
|
||||
_DECISION_RE = re.compile(r"(?i)\b(decided|decision|решили|решение|choose|chosen|use|используем)\b")
|
||||
_OBLIGATION_RE = re.compile(r"(?i)\b(todo|надо|нужно|must|should|обяз|follow up|сделай|сделать)\b")
|
||||
_QUESTION_RE = re.compile(r"(?i)(\?\s*$|\b(unresolved|open question|вопрос|непонятно|уточнить)\b)")
|
||||
_FACT_RE = re.compile(r"(?i)\b(is|are|=|:|это|будет|uses|runs|host|server|model|provider)\b")
|
||||
|
||||
|
||||
@dataclass
|
||||
class LedgerFact:
|
||||
key: str
|
||||
value: str
|
||||
role: str
|
||||
turn_index: int
|
||||
active: bool = True
|
||||
superseded_by: Optional[str] = None
|
||||
|
||||
def line(self) -> str:
|
||||
state = "active" if self.active else f"superseded by {self.superseded_by or 'newer fact'}"
|
||||
return f"- [{state}] {self.key}: {self.value} (turn {self.turn_index}, {self.role})"
|
||||
|
||||
|
||||
class SemanticRLEEngine(ContextEngine):
|
||||
"""Deterministic context engine for a hot-tail + semantic-ledger experiment."""
|
||||
|
||||
threshold_percent = 0.75
|
||||
protect_first_n = 1
|
||||
protect_last_n = 8
|
||||
|
||||
def __init__(self, context_length: int = 200_000, hot_tail_messages: int = 8) -> None:
|
||||
self.context_length = context_length
|
||||
self.threshold_tokens = int(context_length * self.threshold_percent)
|
||||
self.hot_tail_messages = hot_tail_messages
|
||||
self.last_prompt_tokens = 0
|
||||
self.last_completion_tokens = 0
|
||||
self.last_total_tokens = 0
|
||||
self.compression_count = 0
|
||||
self._last_ledger: dict[str, Any] = {}
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "semantic_rle"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]) -> None:
|
||||
self.last_prompt_tokens = int(usage.get("prompt_tokens") or usage.get("input_tokens") or 0)
|
||||
self.last_completion_tokens = int(usage.get("completion_tokens") or usage.get("output_tokens") or 0)
|
||||
self.last_total_tokens = int(usage.get("total_tokens") or (self.last_prompt_tokens + self.last_completion_tokens))
|
||||
|
||||
def should_compress(self, prompt_tokens: Optional[int] = None) -> bool:
|
||||
tokens = self.last_prompt_tokens if prompt_tokens is None else int(prompt_tokens)
|
||||
return bool(self.threshold_tokens and tokens >= self.threshold_tokens)
|
||||
|
||||
def should_compress_preflight(self, messages: List[Message]) -> bool:
|
||||
return self.has_content_to_compress(messages)
|
||||
|
||||
def has_content_to_compress(self, messages: List[Message]) -> bool:
|
||||
return len(self._non_system(messages)) > self.hot_tail_messages
|
||||
|
||||
def update_model(
|
||||
self,
|
||||
model: str,
|
||||
context_length: int,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
provider: str = "",
|
||||
api_mode: str = "",
|
||||
) -> None:
|
||||
self.context_length = int(context_length or self.context_length or 0)
|
||||
self.threshold_tokens = int(self.context_length * self.threshold_percent) if self.context_length else 0
|
||||
|
||||
def compress(
|
||||
self,
|
||||
messages: List[Message],
|
||||
current_tokens: Optional[int] = None,
|
||||
focus_topic: Optional[str] = None,
|
||||
) -> List[Message]:
|
||||
"""Return original head + semantic ledger for cold turns + verbatim hot tail.
|
||||
|
||||
The deterministic path is fail-closed: on unexpected errors, return a
|
||||
shallow copy of the original message list rather than dropping context.
|
||||
"""
|
||||
self.compression_count += 1
|
||||
try:
|
||||
if not messages:
|
||||
return []
|
||||
|
||||
copied = [dict(m) for m in messages]
|
||||
system_head = [m for m in copied if m.get("role") == "system"]
|
||||
non_system = [m for m in copied if m.get("role") != "system"]
|
||||
if len(non_system) <= self.hot_tail_messages:
|
||||
return copied
|
||||
|
||||
hot_tail = non_system[-self.hot_tail_messages :]
|
||||
cold = non_system[: -self.hot_tail_messages]
|
||||
ledger = self._build_ledger(cold, focus_topic=focus_topic)
|
||||
self._last_ledger = ledger
|
||||
|
||||
summary_message: Message = {
|
||||
"role": "system",
|
||||
"content": self._render_summary(ledger, focus_topic=focus_topic),
|
||||
}
|
||||
return [*system_head, summary_message, *hot_tail]
|
||||
except Exception:
|
||||
return [dict(m) for m in messages]
|
||||
|
||||
def get_status(self) -> Dict[str, Any]:
|
||||
status = super().get_status()
|
||||
status.update(
|
||||
{
|
||||
"engine": self.name,
|
||||
"hot_tail_messages": self.hot_tail_messages,
|
||||
"ledger_counts": {
|
||||
key: len(value) for key, value in self._last_ledger.items() if isinstance(value, list)
|
||||
},
|
||||
}
|
||||
)
|
||||
return status
|
||||
|
||||
@staticmethod
|
||||
def _non_system(messages: Iterable[Message]) -> list[Message]:
|
||||
return [m for m in messages if m.get("role") != "system"]
|
||||
|
||||
def _build_ledger(self, messages: List[Message], focus_topic: Optional[str] = None) -> dict[str, Any]:
|
||||
facts_by_key: dict[str, LedgerFact] = {}
|
||||
superseded: list[LedgerFact] = []
|
||||
decisions: list[str] = []
|
||||
obligations: list[str] = []
|
||||
questions: list[str] = []
|
||||
credential_refs: list[str] = []
|
||||
retrieval_notes: list[str] = []
|
||||
|
||||
for index, msg in enumerate(messages, start=1):
|
||||
role = str(msg.get("role", "unknown"))
|
||||
text = self._string_content(msg.get("content", ""))
|
||||
if not text.strip():
|
||||
continue
|
||||
sanitized, refs = self._sanitize(text)
|
||||
credential_refs.extend(refs)
|
||||
snippet = self._snippet(sanitized)
|
||||
|
||||
fact = self._extract_fact(sanitized, role=role, turn_index=index)
|
||||
if fact:
|
||||
old = facts_by_key.get(fact.key)
|
||||
if old and old.value != fact.value:
|
||||
old.active = False
|
||||
old.superseded_by = fact.value
|
||||
superseded.append(old)
|
||||
facts_by_key[fact.key] = fact
|
||||
|
||||
if _DECISION_RE.search(sanitized):
|
||||
decisions.append(f"- {snippet} (turn {index}, {role})")
|
||||
if _OBLIGATION_RE.search(sanitized):
|
||||
obligations.append(f"- {snippet} (turn {index}, {role})")
|
||||
if _QUESTION_RE.search(sanitized):
|
||||
questions.append(f"- {snippet} (turn {index}, {role})")
|
||||
if focus_topic and focus_topic.lower() in sanitized.lower():
|
||||
retrieval_notes.append(f"- Focus match `{focus_topic}` at turn {index}: {snippet}")
|
||||
|
||||
active_facts = [fact for fact in facts_by_key.values() if fact.active]
|
||||
return {
|
||||
"active_facts": active_facts,
|
||||
"decisions": self._dedupe(decisions),
|
||||
"obligations": self._dedupe(obligations),
|
||||
"superseded_facts": superseded,
|
||||
"unresolved_questions": self._dedupe(questions),
|
||||
"credential_refs": self._dedupe(credential_refs),
|
||||
"retrieval_notes": self._dedupe(retrieval_notes),
|
||||
"cold_turns_compacted": len(messages),
|
||||
}
|
||||
|
||||
def _extract_fact(self, text: str, role: str, turn_index: int) -> Optional[LedgerFact]:
|
||||
server = _SERVER_RE.search(text)
|
||||
if server:
|
||||
return LedgerFact("server", server.group(1), role, turn_index)
|
||||
if not _FACT_RE.search(text):
|
||||
return None
|
||||
cleaned = self._snippet(text, limit=180)
|
||||
key = self._fact_key(cleaned)
|
||||
return LedgerFact(key, cleaned, role, turn_index)
|
||||
|
||||
@staticmethod
|
||||
def _fact_key(text: str) -> str:
|
||||
lower = text.lower()
|
||||
before_sep = re.split(r"\s*(?:is|are|=|:|это|будет|uses|runs)\s*", lower, maxsplit=1)[0]
|
||||
words = re.findall(r"[a-zа-я0-9_-]+", before_sep)[:6]
|
||||
return " ".join(words) or "fact"
|
||||
|
||||
@staticmethod
|
||||
def _string_content(content: Any) -> str:
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
parts: list[str] = []
|
||||
for item in content:
|
||||
if isinstance(item, dict):
|
||||
if isinstance(item.get("text"), str):
|
||||
parts.append(item["text"])
|
||||
elif isinstance(item.get("content"), str):
|
||||
parts.append(item["content"])
|
||||
elif isinstance(item, str):
|
||||
parts.append(item)
|
||||
return "\n".join(parts)
|
||||
return str(content)
|
||||
|
||||
def _sanitize(self, text: str) -> Tuple[str, list[str]]:
|
||||
refs: list[str] = []
|
||||
|
||||
def ref_for(raw: str, kind: str = "credential") -> str:
|
||||
digest = hashlib.sha256(raw.encode("utf-8", "ignore")).hexdigest()[:10]
|
||||
ref = f"credential_ref:{kind}:{digest}"
|
||||
refs.append(ref)
|
||||
return ref
|
||||
|
||||
def replace_key_value(match: re.Match[str]) -> str:
|
||||
key = match.group(1)
|
||||
raw = match.group(2)
|
||||
return f"{key}=<{ref_for(raw)}>"
|
||||
|
||||
sanitized = _KEY_VALUE_SECRET_RE.sub(replace_key_value, text)
|
||||
for pattern in _TOKEN_PATTERNS:
|
||||
sanitized = pattern.sub(lambda m: f"<{ref_for(m.group(0))}>", sanitized)
|
||||
sanitized = _IPV4_RE.sub("[REDACTED_IP]", sanitized)
|
||||
return sanitized, refs
|
||||
|
||||
@staticmethod
|
||||
def _snippet(text: str, limit: int = 220) -> str:
|
||||
compact = " ".join(text.split())
|
||||
if len(compact) <= limit:
|
||||
return compact
|
||||
return compact[: limit - 1].rstrip() + "…"
|
||||
|
||||
@staticmethod
|
||||
def _dedupe(items: Iterable[str]) -> list[str]:
|
||||
seen: set[str] = set()
|
||||
result: list[str] = []
|
||||
for item in items:
|
||||
if item not in seen:
|
||||
seen.add(item)
|
||||
result.append(item)
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def _render_summary(ledger: dict[str, Any], focus_topic: Optional[str] = None) -> str:
|
||||
sections: list[str] = [
|
||||
"Semantic RLE context ledger (deterministic, older turns compacted).",
|
||||
f"Cold turns compacted: {ledger.get('cold_turns_compacted', 0)}.",
|
||||
"Hot tail messages after this block are preserved verbatim.",
|
||||
]
|
||||
if focus_topic:
|
||||
sections.append(f"Compression focus: {focus_topic}")
|
||||
|
||||
def add_section(title: str, lines: list[str]) -> None:
|
||||
sections.append(f"\n## {title}")
|
||||
sections.extend(lines or ["- None detected."])
|
||||
|
||||
add_section("Active facts", [f.line() for f in ledger.get("active_facts", [])])
|
||||
add_section("Decisions", ledger.get("decisions", []))
|
||||
add_section("Obligations", ledger.get("obligations", []))
|
||||
add_section("Superseded facts", [f.line() for f in ledger.get("superseded_facts", [])])
|
||||
add_section("Unresolved questions", ledger.get("unresolved_questions", []))
|
||||
add_section("Credential refs", [f"- {ref}" for ref in ledger.get("credential_refs", [])])
|
||||
add_section("Retrieval notes", ledger.get("retrieval_notes", []))
|
||||
return "\n".join(sections)
|
||||
|
||||
|
||||
def register(ctx: Any) -> None:
|
||||
ctx.register_context_engine(SemanticRLEEngine())
|
||||
4
plugins/context_engine/semantic_rle/plugin.yaml
Normal file
4
plugins/context_engine/semantic_rle/plugin.yaml
Normal file
@ -0,0 +1,4 @@
|
||||
name: semantic_rle
|
||||
version: 0.1.0
|
||||
experimental: true
|
||||
description: "Experimental deterministic Semantic RLE context engine: hot tail + factual ledger + supersession notes for long chats."
|
||||
10
run_agent.py
10
run_agent.py
@ -1526,7 +1526,15 @@ class AIAgent:
|
||||
|
||||
# Fallback: truncate the raw string but give more room than 200 chars
|
||||
status_code = getattr(error, "status_code", None)
|
||||
prefix = f"HTTP {status_code}: " if status_code else ""
|
||||
prefix_parts = []
|
||||
if status_code:
|
||||
prefix_parts.append(f"HTTP {status_code}")
|
||||
# For local bugs (TypeError, ValueError without status_code), include
|
||||
# the exception type so logs clearly distinguish "provider rejected us"
|
||||
# from "we have a code bug".
|
||||
if not status_code and isinstance(error, (TypeError, ValueError)):
|
||||
prefix_parts.append(type(error).__name__)
|
||||
prefix = f"{': '.join(prefix_parts)}: " if prefix_parts else ""
|
||||
return f"{prefix}{raw[:500]}"
|
||||
|
||||
def _mask_api_key_for_logs(self, key: Any) -> Optional[str]:
|
||||
|
||||
171
scripts/semantic_rle_eval.py
Normal file
171
scripts/semantic_rle_eval.py
Normal file
@ -0,0 +1,171 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Deterministic smoke-eval for the experimental semantic_rle context engine.
|
||||
|
||||
This is not an LLM quality benchmark. It checks the minimum invariant we need
|
||||
before trying live Telegram replay: after cold-history compaction, the
|
||||
model-visible context still contains current facts/obligations, marks stale
|
||||
facts as superseded, preserves the hot tail byte-for-byte, and does not leak
|
||||
raw fake secrets from cold turns.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(REPO_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
|
||||
from plugins.context_engine.semantic_rle import SemanticRLEEngine # noqa: E402
|
||||
|
||||
Message = dict[str, Any]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Scenario:
|
||||
name: str
|
||||
messages: list[Message]
|
||||
hot_tail_messages: int
|
||||
checks: dict[str, Callable[[list[Message]], bool]]
|
||||
|
||||
|
||||
def _render(messages: list[Message]) -> str:
|
||||
return "\n".join(str(message.get("content", "")) for message in messages)
|
||||
|
||||
|
||||
def _hot_tail_only(messages: list[Message], hot_tail_messages: int) -> list[Message]:
|
||||
system_head = [dict(m) for m in messages if m.get("role") == "system"]
|
||||
non_system = [dict(m) for m in messages if m.get("role") != "system"]
|
||||
return [*system_head, *non_system[-hot_tail_messages:]]
|
||||
|
||||
|
||||
def _semantic_rle(messages: list[Message], hot_tail_messages: int) -> list[Message]:
|
||||
return SemanticRLEEngine(hot_tail_messages=hot_tail_messages).compress(messages)
|
||||
|
||||
|
||||
def _scenarios() -> list[Scenario]:
|
||||
fake_token = "sk-test1234567890abcdef1234567890"
|
||||
fake_pat = "ghp_abcdef1234567890abcdef1234567890"
|
||||
ip = "203.0.113.42"
|
||||
|
||||
supersession_messages = [
|
||||
{"role": "system", "content": "You are Hermes."},
|
||||
{"role": "user", "content": "server alpha.example is the current deployment target"},
|
||||
{"role": "assistant", "content": "Noted: server alpha.example."},
|
||||
{"role": "user", "content": "decision: use postgres for the ledger"},
|
||||
{"role": "assistant", "content": "I will use postgres."},
|
||||
{"role": "user", "content": "todo: compare misses against baseline"},
|
||||
{"role": "assistant", "content": "Added comparison todo."},
|
||||
{"role": "user", "content": "unresolved question: how often to compact?"},
|
||||
{"role": "assistant", "content": "We can measure compaction cadence."},
|
||||
{"role": "user", "content": "server beta.example is the current deployment target now"},
|
||||
{"role": "assistant", "content": "Switched to beta.example."},
|
||||
{"role": "user", "content": "hot tail user message"},
|
||||
{"role": "assistant", "content": "hot tail assistant message"},
|
||||
]
|
||||
|
||||
secret_messages = [
|
||||
{"role": "system", "content": "You are Hermes."},
|
||||
{"role": "user", "content": f"api_key={fake_token} server {ip}"},
|
||||
{"role": "assistant", "content": f"token: {fake_pat}"},
|
||||
{"role": "user", "content": "todo: rotate the credential reference"},
|
||||
{"role": "assistant", "content": "Will track credential refs only."},
|
||||
{"role": "user", "content": "hot tail one"},
|
||||
{"role": "assistant", "content": "hot tail two"},
|
||||
]
|
||||
|
||||
return [
|
||||
Scenario(
|
||||
name="supersession_and_obligation",
|
||||
messages=supersession_messages,
|
||||
hot_tail_messages=2,
|
||||
checks={
|
||||
"current_fact_retained": lambda m: "server: beta.example" in _render(m),
|
||||
"old_fact_marked_superseded": lambda m: "[superseded by beta.example] server: alpha.example" in _render(m),
|
||||
"decision_retained": lambda m: "use postgres" in _render(m),
|
||||
"obligation_retained": lambda m: "compare misses against baseline" in _render(m),
|
||||
"question_retained": lambda m: "how often to compact" in _render(m),
|
||||
"hot_tail_preserved": lambda m: m[-2:] == supersession_messages[-2:],
|
||||
},
|
||||
),
|
||||
Scenario(
|
||||
name="cold_secret_redaction",
|
||||
messages=secret_messages,
|
||||
hot_tail_messages=2,
|
||||
checks={
|
||||
"raw_fake_token_absent": lambda m: fake_token not in _render(m) and fake_pat not in _render(m),
|
||||
"raw_ip_absent": lambda m: ip not in _render(m),
|
||||
"credential_ref_present": lambda m: "credential_ref:credential:" in _render(m),
|
||||
"ip_redacted_marker_present": lambda m: "[REDACTED_IP]" in _render(m),
|
||||
"obligation_retained": lambda m: "rotate the credential reference" in _render(m),
|
||||
"hot_tail_preserved": lambda m: m[-2:] == secret_messages[-2:],
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def run_eval() -> dict[str, Any]:
|
||||
engines: dict[str, Callable[[list[Message], int], list[Message]]] = {
|
||||
"hot_tail_only_baseline": _hot_tail_only,
|
||||
"semantic_rle": _semantic_rle,
|
||||
}
|
||||
results: list[dict[str, Any]] = []
|
||||
|
||||
for scenario in _scenarios():
|
||||
for engine_name, engine_fn in engines.items():
|
||||
compacted = engine_fn(scenario.messages, scenario.hot_tail_messages)
|
||||
checks = {name: bool(check(compacted)) for name, check in scenario.checks.items()}
|
||||
passed = sum(1 for ok in checks.values() if ok)
|
||||
results.append(
|
||||
{
|
||||
"scenario": scenario.name,
|
||||
"engine": engine_name,
|
||||
"passed": passed,
|
||||
"total": len(checks),
|
||||
"misses": [name for name, ok in checks.items() if not ok],
|
||||
"message_count_before": len(scenario.messages),
|
||||
"message_count_after": len(compacted),
|
||||
"char_count_after": len(_render(compacted)),
|
||||
}
|
||||
)
|
||||
|
||||
by_engine: dict[str, dict[str, int]] = {}
|
||||
for row in results:
|
||||
aggregate = by_engine.setdefault(row["engine"], {"passed": 0, "total": 0})
|
||||
aggregate["passed"] += int(row["passed"])
|
||||
aggregate["total"] += int(row["total"])
|
||||
|
||||
return {"results": results, "summary": by_engine}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--json", action="store_true", help="print machine-readable JSON")
|
||||
args = parser.parse_args()
|
||||
|
||||
report = run_eval()
|
||||
if args.json:
|
||||
print(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True))
|
||||
else:
|
||||
print("Semantic RLE deterministic smoke-eval")
|
||||
for engine, row in report["summary"].items():
|
||||
print(f"- {engine}: {row['passed']}/{row['total']} invariant checks passed")
|
||||
print("Details:")
|
||||
for row in report["results"]:
|
||||
misses = ", ".join(row["misses"]) or "none"
|
||||
print(
|
||||
f"- {row['scenario']} / {row['engine']}: "
|
||||
f"{row['passed']}/{row['total']}, misses={misses}, "
|
||||
f"messages {row['message_count_before']}→{row['message_count_after']}, "
|
||||
f"chars_after={row['char_count_after']}"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
32
tests/gateway/test_shutdown_pending_followup.py
Normal file
32
tests/gateway/test_shutdown_pending_followup.py
Normal file
@ -0,0 +1,32 @@
|
||||
"""Regression tests for queued follow-ups during gateway shutdown.
|
||||
|
||||
A user message can arrive while the gateway is still draining an active agent
|
||||
turn (for example during a service restart after a code update). The active
|
||||
turn may finish cleanly while the gateway is in draining mode. In that case
|
||||
we must process the already-queued follow-up before exiting instead of dropping
|
||||
it silently.
|
||||
"""
|
||||
|
||||
from gateway.run import _should_process_pending_during_drain
|
||||
|
||||
|
||||
def test_clean_result_processes_pending_followup_during_drain():
|
||||
result = {"final_response": "done", "messages": [], "api_calls": 1}
|
||||
|
||||
assert _should_process_pending_during_drain(result) is True
|
||||
|
||||
|
||||
def test_interrupted_result_drops_pending_followup_during_drain():
|
||||
result = {"interrupted": True, "final_response": ""}
|
||||
|
||||
assert _should_process_pending_during_drain(result) is False
|
||||
|
||||
|
||||
def test_failed_result_drops_pending_followup_during_drain():
|
||||
result = {"failed": True, "final_response": "boom"}
|
||||
|
||||
assert _should_process_pending_during_drain(result) is False
|
||||
|
||||
|
||||
def test_missing_result_drops_pending_followup_during_drain():
|
||||
assert _should_process_pending_during_drain(None) is False
|
||||
@ -79,6 +79,77 @@ async def test_voice_message_still_transcribed():
|
||||
assert "voice message" in result.lower()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_voice_stt_fallback_only_for_uncertain_openai_mini(monkeypatch):
|
||||
"""OpenAI mini-transcribe should retry whisper-1 only when the transcript looks uncertain."""
|
||||
runner = _make_runner(stt_enabled=True)
|
||||
source = SessionSource(platform=Platform.TELEGRAM, chat_id="1", chat_type="dm")
|
||||
event = _voice_event("/tmp/voice.ogg")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.transcription_tools._load_stt_config",
|
||||
lambda: {"provider": "openai", "openai": {"model": "gpt-4o-mini-transcribe"}},
|
||||
)
|
||||
calls = []
|
||||
|
||||
def fake_transcribe(path, model=None):
|
||||
calls.append((path, model))
|
||||
if model == "whisper-1":
|
||||
return {"success": True, "transcript": "нормальный текст", "provider": "openai"}
|
||||
return {"success": True, "transcript": "[inaudible]", "provider": "openai"}
|
||||
|
||||
with patch("tools.transcription_tools.transcribe_audio", side_effect=fake_transcribe):
|
||||
result = await runner._prepare_inbound_message_text(event=event, source=source, history=[])
|
||||
|
||||
assert calls == [("/tmp/voice.ogg", None), ("/tmp/voice.ogg", "whisper-1")]
|
||||
assert "нормальный текст" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_voice_stt_no_fallback_for_confident_openai_mini(monkeypatch):
|
||||
"""Confident primary transcripts should not pay for a Whisper fallback."""
|
||||
runner = _make_runner(stt_enabled=True)
|
||||
source = SessionSource(platform=Platform.TELEGRAM, chat_id="1", chat_type="dm")
|
||||
event = _voice_event("/tmp/voice.ogg")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.transcription_tools._load_stt_config",
|
||||
lambda: {"provider": "openai", "openai": {"model": "gpt-4o-mini-transcribe"}},
|
||||
)
|
||||
with patch(
|
||||
"tools.transcription_tools.transcribe_audio",
|
||||
return_value={"success": True, "transcript": "привет, проверь статус", "provider": "openai"},
|
||||
) as mock_transcribe:
|
||||
result = await runner._prepare_inbound_message_text(event=event, source=source, history=[])
|
||||
|
||||
mock_transcribe.assert_called_once_with("/tmp/voice.ogg")
|
||||
assert "привет, проверь статус" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_voice_stt_uncertain_after_failed_fallback_requests_short_clarification(monkeypatch):
|
||||
"""If fallback cannot rescue an uncertain transcript, the agent should ask briefly instead of guessing."""
|
||||
runner = _make_runner(stt_enabled=True)
|
||||
source = SessionSource(platform=Platform.TELEGRAM, chat_id="1", chat_type="dm")
|
||||
event = _voice_event("/tmp/voice.ogg")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.transcription_tools._load_stt_config",
|
||||
lambda: {"provider": "openai", "openai": {"model": "gpt-4o-mini-transcribe"}},
|
||||
)
|
||||
|
||||
def fake_transcribe(path, model=None):
|
||||
if model == "whisper-1":
|
||||
return {"success": False, "error": "temporary provider error", "provider": "openai"}
|
||||
return {"success": True, "transcript": "[inaudible]", "provider": "openai"}
|
||||
|
||||
with patch("tools.transcription_tools.transcribe_audio", side_effect=fake_transcribe):
|
||||
result = await runner._prepare_inbound_message_text(event=event, source=source, history=[])
|
||||
|
||||
assert "[inaudible]" in result
|
||||
assert "short clarification in Russian" in result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. AUDIO file attachment bypasses STT
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@ -17,6 +17,7 @@ def _make_adapter(**extra_env):
|
||||
adapter.platform = Platform.TELEGRAM
|
||||
adapter.config = PlatformConfig(enabled=True, token="fake-token")
|
||||
adapter._bot = AsyncMock()
|
||||
adapter._bot.id = 999
|
||||
adapter._bot.set_message_reaction = AsyncMock()
|
||||
return adapter
|
||||
|
||||
@ -315,3 +316,79 @@ def test_config_reactions_env_takes_precedence(monkeypatch, tmp_path):
|
||||
|
||||
import os
|
||||
assert os.getenv("TELEGRAM_REACTIONS") == "false"
|
||||
|
||||
|
||||
# ── inbound user reactions ───────────────────────────────────────────
|
||||
|
||||
|
||||
def _make_reaction_update(*, emoji="👎", old_emoji=None, user_id=42, bot_id=999):
|
||||
new_reaction = [] if emoji is None else [SimpleNamespace(emoji=emoji)]
|
||||
old_reaction = [] if old_emoji is None else [SimpleNamespace(emoji=old_emoji)]
|
||||
return SimpleNamespace(
|
||||
update_id=777,
|
||||
message_reaction=SimpleNamespace(
|
||||
chat=SimpleNamespace(id=123, type="private", title=None),
|
||||
user=SimpleNamespace(id=user_id, first_name="Anton", username="toxblh"),
|
||||
message_id=456,
|
||||
message_thread_id=None,
|
||||
new_reaction=new_reaction,
|
||||
old_reaction=old_reaction,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_inbound_negative_reaction_becomes_mimo_feedback():
|
||||
adapter = _make_adapter()
|
||||
adapter._is_callback_user_authorized = lambda *args, **kwargs: True
|
||||
adapter.handle_message = AsyncMock()
|
||||
|
||||
await adapter._handle_message_reaction(_make_reaction_update(emoji="👎"), None)
|
||||
|
||||
adapter.handle_message.assert_awaited_once()
|
||||
event = adapter.handle_message.await_args.args[0]
|
||||
assert event.text == (
|
||||
"[Telegram reaction feedback: 👎 on message 456. "
|
||||
"Interpretation: negative feedback / мимо. "
|
||||
"If the target was a cron/news/digest item, use this as taste feedback.]"
|
||||
)
|
||||
assert event.source.chat_id == "123"
|
||||
assert event.source.user_id == "42"
|
||||
assert event.source.message_id == "456"
|
||||
assert event.message_id == "456"
|
||||
assert event.platform_update_id == 777
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_inbound_positive_reaction_becomes_godno_feedback():
|
||||
adapter = _make_adapter()
|
||||
adapter._is_callback_user_authorized = lambda *args, **kwargs: True
|
||||
adapter.handle_message = AsyncMock()
|
||||
|
||||
await adapter._handle_message_reaction(_make_reaction_update(emoji="👌"), None)
|
||||
|
||||
event = adapter.handle_message.await_args.args[0]
|
||||
assert "Telegram reaction feedback: 👌" in event.text
|
||||
assert "Interpretation: positive feedback / годно" in event.text
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_inbound_reaction_removal_is_ignored():
|
||||
adapter = _make_adapter()
|
||||
adapter._is_callback_user_authorized = lambda *args, **kwargs: True
|
||||
adapter.handle_message = AsyncMock()
|
||||
|
||||
await adapter._handle_message_reaction(_make_reaction_update(emoji=None, old_emoji="👎"), None)
|
||||
|
||||
adapter.handle_message.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_inbound_bot_own_reaction_is_ignored():
|
||||
adapter = _make_adapter()
|
||||
adapter._is_callback_user_authorized = lambda *args, **kwargs: True
|
||||
adapter.handle_message = AsyncMock()
|
||||
|
||||
await adapter._handle_message_reaction(_make_reaction_update(emoji="👎", user_id=999), None)
|
||||
|
||||
adapter.handle_message.assert_not_awaited()
|
||||
|
||||
111
tests/gateway/test_telegram_voice_ack.py
Normal file
111
tests/gateway/test_telegram_voice_ack.py
Normal file
@ -0,0 +1,111 @@
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import Platform
|
||||
from gateway.platforms.base import MessageEvent, MessageType, SendResult
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
def _voice_event(thread_id=None) -> MessageEvent:
|
||||
return MessageEvent(
|
||||
text="",
|
||||
message_type=MessageType.VOICE,
|
||||
source=SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="123",
|
||||
chat_type="dm",
|
||||
thread_id=thread_id,
|
||||
message_id="42",
|
||||
),
|
||||
message_id="42",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pre_stt_ack_is_best_effort_and_platform_side():
|
||||
from gateway.platforms.telegram import TelegramAdapter
|
||||
|
||||
adapter = TelegramAdapter.__new__(TelegramAdapter)
|
||||
adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="99"))
|
||||
|
||||
event = _voice_event()
|
||||
await adapter._send_voice_pre_stt_ack(event)
|
||||
|
||||
adapter.send.assert_awaited_once_with(
|
||||
"123",
|
||||
"🎧 получил, распознаю",
|
||||
reply_to="42",
|
||||
metadata=None,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pre_stt_ack_failure_does_not_raise():
|
||||
from gateway.platforms.telegram import TelegramAdapter
|
||||
|
||||
adapter = TelegramAdapter.__new__(TelegramAdapter)
|
||||
adapter.send = AsyncMock(side_effect=RuntimeError("telegram is temporarily unavailable"))
|
||||
|
||||
await adapter._send_voice_pre_stt_ack(_voice_event())
|
||||
|
||||
adapter.send.assert_awaited_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_voice_handler_sends_ack_before_dispatching_to_gateway():
|
||||
from gateway.platforms.telegram import TelegramAdapter
|
||||
|
||||
adapter = TelegramAdapter.__new__(TelegramAdapter)
|
||||
adapter.platform = Platform.TELEGRAM
|
||||
adapter.config = MagicMock()
|
||||
adapter.config.extra = {}
|
||||
order = []
|
||||
adapter._should_process_message = lambda message: True
|
||||
adapter._apply_telegram_group_observe_attribution = lambda event: event
|
||||
adapter._send_voice_pre_stt_ack = AsyncMock(side_effect=lambda event: order.append("ack"))
|
||||
adapter.handle_message = AsyncMock(side_effect=lambda event: order.append("handle"))
|
||||
|
||||
file_obj = MagicMock()
|
||||
file_obj.download_as_bytearray = AsyncMock(return_value=bytearray(b"ogg-data"))
|
||||
voice = MagicMock()
|
||||
voice.get_file = AsyncMock(return_value=file_obj)
|
||||
|
||||
chat = MagicMock()
|
||||
chat.id = 123
|
||||
chat.type = "private"
|
||||
chat.title = None
|
||||
chat.full_name = "Тохыч"
|
||||
user = MagicMock()
|
||||
user.id = 456
|
||||
user.full_name = "Тохыч"
|
||||
|
||||
msg = MagicMock()
|
||||
msg.chat = chat
|
||||
msg.from_user = user
|
||||
msg.message_id = 42
|
||||
msg.message_thread_id = None
|
||||
msg.is_topic_message = False
|
||||
msg.date = None
|
||||
msg.caption = None
|
||||
msg.text = ""
|
||||
msg.reply_to_message = None
|
||||
msg.sticker = None
|
||||
msg.photo = []
|
||||
msg.video = None
|
||||
msg.audio = None
|
||||
msg.voice = voice
|
||||
msg.document = None
|
||||
msg.media_group_id = None
|
||||
|
||||
update = MagicMock()
|
||||
update.message = msg
|
||||
update.update_id = 777
|
||||
|
||||
with patch("gateway.platforms.telegram.cache_audio_from_bytes", return_value="/tmp/voice.ogg"):
|
||||
await adapter._handle_media_message(update, MagicMock())
|
||||
|
||||
assert order == ["ack", "handle"]
|
||||
handled_event = adapter.handle_message.await_args.args[0]
|
||||
assert handled_event.media_urls == ["/tmp/voice.ogg"]
|
||||
assert handled_event.media_types == ["audio/ogg"]
|
||||
102
tests/plugins/test_semantic_rle_context_engine.py
Normal file
102
tests/plugins/test_semantic_rle_context_engine.py
Normal file
@ -0,0 +1,102 @@
|
||||
"""Tests for the experimental semantic_rle context engine plugin."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from agent.context_engine import ContextEngine
|
||||
from plugins.context_engine import discover_context_engines, load_context_engine
|
||||
from plugins.context_engine.semantic_rle import SemanticRLEEngine
|
||||
|
||||
|
||||
def _messages() -> list[dict[str, str]]:
|
||||
return [
|
||||
{"role": "system", "content": "You are Hermes."},
|
||||
{"role": "user", "content": "server alpha.example is the current deployment target"},
|
||||
{"role": "assistant", "content": "Noted: server alpha.example."},
|
||||
{"role": "user", "content": "decision: use postgres for the ledger"},
|
||||
{"role": "assistant", "content": "I will use postgres."},
|
||||
{"role": "user", "content": "todo: compare misses against baseline"},
|
||||
{"role": "assistant", "content": "Added comparison todo."},
|
||||
{"role": "user", "content": "unresolved question: how often to compact?"},
|
||||
{"role": "assistant", "content": "We can measure compaction cadence."},
|
||||
{"role": "user", "content": "server beta.example is the current deployment target now"},
|
||||
{"role": "assistant", "content": "Switched to beta.example."},
|
||||
{"role": "user", "content": "hot tail user message"},
|
||||
{"role": "assistant", "content": "hot tail assistant message"},
|
||||
]
|
||||
|
||||
|
||||
def test_semantic_rle_satisfies_abc_and_returns_valid_messages():
|
||||
engine = SemanticRLEEngine(hot_tail_messages=4)
|
||||
assert isinstance(engine, ContextEngine)
|
||||
assert engine.name == "semantic_rle"
|
||||
|
||||
result = engine.compress(_messages())
|
||||
|
||||
assert isinstance(result, list)
|
||||
assert all("role" in message and "content" in message for message in result)
|
||||
assert result[0]["role"] == "system"
|
||||
assert any("Semantic RLE context ledger" in message["content"] for message in result)
|
||||
|
||||
|
||||
def test_hot_tail_preserved_verbatim():
|
||||
engine = SemanticRLEEngine(hot_tail_messages=4)
|
||||
messages = _messages()
|
||||
|
||||
result = engine.compress(messages)
|
||||
|
||||
assert result[-4:] == messages[-4:]
|
||||
|
||||
|
||||
def test_server_supersession_marks_old_fact_inactive():
|
||||
engine = SemanticRLEEngine(hot_tail_messages=2)
|
||||
|
||||
result = engine.compress(_messages())
|
||||
summary = "\n".join(str(message["content"]) for message in result if message["role"] == "system")
|
||||
|
||||
assert "server: beta.example" in summary
|
||||
assert "[superseded by beta.example] server: alpha.example" in summary
|
||||
|
||||
|
||||
def test_fake_tokens_and_ip_like_strings_are_redacted_to_refs():
|
||||
engine = SemanticRLEEngine(hot_tail_messages=2)
|
||||
fake_token = "sk-test_abcdefghijklmnopqrstuvwxyz1234567890"
|
||||
fake_pat = "ghp_abcdefghijklmnopqrstuvwxyz1234567890"
|
||||
messages = [
|
||||
{"role": "user", "content": f"api_key={fake_token} server 203.0.113.9"},
|
||||
{"role": "assistant", "content": f"token: {fake_pat}"},
|
||||
{"role": "user", "content": "tail one"},
|
||||
{"role": "assistant", "content": "tail two"},
|
||||
{"role": "user", "content": "tail three"},
|
||||
]
|
||||
|
||||
result = engine.compress(messages)
|
||||
rendered = "\n".join(str(message["content"]) for message in result)
|
||||
|
||||
assert fake_token not in rendered
|
||||
assert fake_pat not in rendered
|
||||
assert "203.0.113.9" not in rendered
|
||||
assert "credential_ref:credential:" in rendered
|
||||
assert "[REDACTED_IP]" in rendered
|
||||
|
||||
|
||||
def test_deterministic_failure_path_returns_original_messages(monkeypatch):
|
||||
engine = SemanticRLEEngine(hot_tail_messages=2)
|
||||
messages = _messages()
|
||||
|
||||
def explode(*args, **kwargs):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
monkeypatch.setattr(engine, "_build_ledger", explode)
|
||||
|
||||
assert engine.compress(messages) == messages
|
||||
|
||||
|
||||
def test_plugin_discovery_and_explicit_load_without_global_activation():
|
||||
discovered = {name: (description, available) for name, description, available in discover_context_engines()}
|
||||
|
||||
assert "semantic_rle" in discovered
|
||||
assert discovered["semantic_rle"][1] is True
|
||||
|
||||
engine = load_context_engine("semantic_rle")
|
||||
assert isinstance(engine, SemanticRLEEngine)
|
||||
assert engine.name == "semantic_rle"
|
||||
39
tests/scripts/test_semantic_rle_eval.py
Normal file
39
tests/scripts/test_semantic_rle_eval.py
Normal file
@ -0,0 +1,39 @@
|
||||
"""Tests for scripts/semantic_rle_eval.py."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _load_eval_module():
|
||||
path = Path(__file__).resolve().parents[2] / "scripts" / "semantic_rle_eval.py"
|
||||
spec = importlib.util.spec_from_file_location("semantic_rle_eval", path)
|
||||
assert spec and spec.loader
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules["semantic_rle_eval"] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def test_semantic_rle_eval_reports_baseline_misses_and_semantic_passes():
|
||||
module = _load_eval_module()
|
||||
|
||||
report = module.run_eval()
|
||||
|
||||
assert report["summary"]["semantic_rle"] == {"passed": 12, "total": 12}
|
||||
assert report["summary"]["hot_tail_only_baseline"]["passed"] < 12
|
||||
|
||||
semantic_rows = [row for row in report["results"] if row["engine"] == "semantic_rle"]
|
||||
assert all(row["misses"] == [] for row in semantic_rows)
|
||||
|
||||
baseline_misses = {
|
||||
miss
|
||||
for row in report["results"]
|
||||
if row["engine"] == "hot_tail_only_baseline"
|
||||
for miss in row["misses"]
|
||||
}
|
||||
assert "current_fact_retained" in baseline_misses
|
||||
assert "obligation_retained" in baseline_misses
|
||||
assert "credential_ref_present" in baseline_misses
|
||||
Loading…
Reference in New Issue
Block a user