fix(credential-pool): correct pool rotation when weekly usage limit is reached
After key #1 is marked exhausted the retry still called the API with key #1 due to env-var bias in _get_cached_client / resolve_api_key_provider_credentials. Fix: peek the pool and pass the active entry's key as explicit_api_key. Secondary: api_key_hint in mark_exhausted_and_rotate pins the correct entry under concurrent CLI+gateway calls; _is_payment_error matches GoUsageLimitError; extract_api_error_context parses "Resets in Xhr Ymin".
This commit is contained in:
parent
8f19485f53
commit
4117fc3645
@ -612,6 +612,8 @@ def recover_with_credential_pool(
|
|||||||
context_message = str(error_context.get("message") or "").lower()
|
context_message = str(error_context.get("message") or "").lower()
|
||||||
usage_limit_reached = (
|
usage_limit_reached = (
|
||||||
"usage_limit_reached" in context_reason
|
"usage_limit_reached" in context_reason
|
||||||
|
or "gousagelimit" in context_reason
|
||||||
|
or "usage limit reached" in context_message
|
||||||
or "usage limit has been reached" in context_message
|
or "usage limit has been reached" in context_message
|
||||||
)
|
)
|
||||||
if not has_retried_429 and not usage_limit_reached:
|
if not has_retried_429 and not usage_limit_reached:
|
||||||
@ -2090,19 +2092,33 @@ def extract_api_error_context(error: Exception) -> Dict[str, Any]:
|
|||||||
if "reset_at" not in context:
|
if "reset_at" not in context:
|
||||||
message = context.get("message") or ""
|
message = context.get("message") or ""
|
||||||
if isinstance(message, str):
|
if isinstance(message, str):
|
||||||
delay_match = re.search(r"quotaResetDelay[:\s\"]+(\\d+(?:\\.\\d+)?)(ms|s)", message, re.IGNORECASE)
|
delay_match = re.search(r"quotaResetDelay[:\s\"]+(\d+(?:\.\d+)?)(ms|s)", message, re.IGNORECASE)
|
||||||
if delay_match:
|
if delay_match:
|
||||||
value = float(delay_match.group(1))
|
value = float(delay_match.group(1))
|
||||||
seconds = value / 1000.0 if delay_match.group(2).lower() == "ms" else value
|
seconds = value / 1000.0 if delay_match.group(2).lower() == "ms" else value
|
||||||
context["reset_at"] = time.time() + seconds
|
context["reset_at"] = time.time() + seconds
|
||||||
else:
|
else:
|
||||||
sec_match = re.search(
|
resets_in_match = re.search(
|
||||||
r"retry\s+(?:after\s+)?(\d+(?:\.\d+)?)\s*(?:sec|secs|seconds|s\b)",
|
r"resets?\s+in\s+"
|
||||||
|
r"(?:(\d+(?:\.\d+)?)\s*(?:h|hr|hrs|hour|hours)\b\s*)?"
|
||||||
|
r"(?:(\d+(?:\.\d+)?)\s*(?:m|min|mins|minute|minutes)\b\s*)?"
|
||||||
|
r"(?:(\d+(?:\.\d+)?)\s*(?:s|sec|secs|second|seconds)\b)?",
|
||||||
message,
|
message,
|
||||||
re.IGNORECASE,
|
re.IGNORECASE,
|
||||||
)
|
)
|
||||||
if sec_match:
|
if resets_in_match and any(resets_in_match.groups()):
|
||||||
context["reset_at"] = time.time() + float(sec_match.group(1))
|
hours = float(resets_in_match.group(1) or 0)
|
||||||
|
minutes = float(resets_in_match.group(2) or 0)
|
||||||
|
seconds = float(resets_in_match.group(3) or 0)
|
||||||
|
context["reset_at"] = time.time() + (hours * 3600) + (minutes * 60) + seconds
|
||||||
|
else:
|
||||||
|
sec_match = re.search(
|
||||||
|
r"retry\s+(?:after\s+)?(\d+(?:\.\d+)?)\s*(?:sec|secs|seconds|s\b)",
|
||||||
|
message,
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
if sec_match:
|
||||||
|
context["reset_at"] = time.time() + float(sec_match.group(1))
|
||||||
|
|
||||||
return context
|
return context
|
||||||
|
|
||||||
|
|||||||
@ -1406,6 +1406,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||||||
for provider_id, pconfig in PROVIDER_REGISTRY.items():
|
for provider_id, pconfig in PROVIDER_REGISTRY.items():
|
||||||
if pconfig.auth_type != "api_key":
|
if pconfig.auth_type != "api_key":
|
||||||
continue
|
continue
|
||||||
|
if _is_provider_unhealthy(provider_id):
|
||||||
|
logger.debug("Auxiliary api-key chain: %s is unhealthy, skipping", provider_id)
|
||||||
|
continue
|
||||||
if provider_id == "anthropic":
|
if provider_id == "anthropic":
|
||||||
# Only try anthropic when the user has explicitly configured it.
|
# Only try anthropic when the user has explicitly configured it.
|
||||||
# Without this gate, Claude Code credentials get silently used
|
# Without this gate, Claude Code credentials get silently used
|
||||||
@ -2260,11 +2263,12 @@ def _is_payment_error(exc: Exception) -> bool:
|
|||||||
"credits", "insufficient funds",
|
"credits", "insufficient funds",
|
||||||
"can only afford", "billing",
|
"can only afford", "billing",
|
||||||
"payment required",
|
"payment required",
|
||||||
# Daily / monthly quota exhaustion keywords
|
# Daily / monthly / weekly quota exhaustion keywords
|
||||||
"quota exceeded", "quota_exceeded",
|
"quota exceeded", "quota_exceeded",
|
||||||
"too many tokens per day", "daily limit",
|
"too many tokens per day", "daily limit",
|
||||||
"tokens per day", "daily quota",
|
"tokens per day", "daily quota",
|
||||||
"resource exhausted", # Vertex AI / gRPC quota errors
|
"resource exhausted", # Vertex AI / gRPC quota errors
|
||||||
|
"weekly usage limit", "weekly limit", # OpenCode Go weekly subscription cap
|
||||||
)):
|
)):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@ -2478,7 +2482,11 @@ def _pool_error_context(exc: Exception) -> Dict[str, Any]:
|
|||||||
return payload
|
return payload
|
||||||
|
|
||||||
|
|
||||||
def _recoverable_pool_provider(resolved_provider: str, client: Any) -> Optional[str]:
|
def _recoverable_pool_provider(
|
||||||
|
resolved_provider: str,
|
||||||
|
client: Any,
|
||||||
|
main_runtime: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> Optional[str]:
|
||||||
"""Infer which provider pool can recover the current auxiliary client."""
|
"""Infer which provider pool can recover the current auxiliary client."""
|
||||||
normalized = _normalize_aux_provider(resolved_provider)
|
normalized = _normalize_aux_provider(resolved_provider)
|
||||||
if normalized not in {"", "auto", "custom"}:
|
if normalized not in {"", "auto", "custom"}:
|
||||||
@ -2496,11 +2504,33 @@ def _recoverable_pool_provider(resolved_provider: str, client: Any) -> Optional[
|
|||||||
return "copilot"
|
return "copilot"
|
||||||
if base_url_host_matches(base, "api.kimi.com"):
|
if base_url_host_matches(base, "api.kimi.com"):
|
||||||
return "kimi-coding"
|
return "kimi-coding"
|
||||||
|
# For api_key providers not in the hardcoded list (e.g. opencode-go), match
|
||||||
|
# the client base URL against all registered api_key providers so that
|
||||||
|
# credential-pool rotation works for any provider the user configured.
|
||||||
|
if main_runtime:
|
||||||
|
rt = _normalize_main_runtime(main_runtime)
|
||||||
|
rt_provider = rt.get("provider", "")
|
||||||
|
if rt_provider and rt_provider not in {"", "auto", "custom"}:
|
||||||
|
try:
|
||||||
|
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||||
|
pconfig = PROVIDER_REGISTRY.get(rt_provider)
|
||||||
|
if pconfig and getattr(pconfig, "auth_type", None) == "api_key":
|
||||||
|
rt_base = str(getattr(pconfig, "inference_base_url", "") or "").rstrip("/")
|
||||||
|
if rt_base and base_url_host_matches(base, base_url_hostname(rt_base)):
|
||||||
|
return rt_provider
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _recover_provider_pool(provider: str, exc: Exception) -> bool:
|
def _recover_provider_pool(provider: str, exc: Exception, *, failed_api_key: str = "") -> bool:
|
||||||
"""Try same-provider credential-pool recovery for auxiliary calls."""
|
"""Try same-provider credential-pool recovery for auxiliary calls.
|
||||||
|
|
||||||
|
``failed_api_key`` is the API key that was actually used for the failing
|
||||||
|
request. Passing it lets mark_exhausted_and_rotate identify the correct
|
||||||
|
pool entry even when another process has already rotated the pool (which
|
||||||
|
would leave current() as None, causing the wrong entry to be marked).
|
||||||
|
"""
|
||||||
normalized = _normalize_aux_provider(provider)
|
normalized = _normalize_aux_provider(provider)
|
||||||
try:
|
try:
|
||||||
pool = load_pool(normalized)
|
pool = load_pool(normalized)
|
||||||
@ -2512,6 +2542,7 @@ def _recover_provider_pool(provider: str, exc: Exception) -> bool:
|
|||||||
|
|
||||||
status_code = getattr(exc, "status_code", None)
|
status_code = getattr(exc, "status_code", None)
|
||||||
error_context = _pool_error_context(exc)
|
error_context = _pool_error_context(exc)
|
||||||
|
hint = failed_api_key or None
|
||||||
|
|
||||||
if _is_auth_error(exc):
|
if _is_auth_error(exc):
|
||||||
refreshed = pool.try_refresh_current()
|
refreshed = pool.try_refresh_current()
|
||||||
@ -2521,6 +2552,7 @@ def _recover_provider_pool(provider: str, exc: Exception) -> bool:
|
|||||||
next_entry = pool.mark_exhausted_and_rotate(
|
next_entry = pool.mark_exhausted_and_rotate(
|
||||||
status_code=status_code if status_code is not None else 401,
|
status_code=status_code if status_code is not None else 401,
|
||||||
error_context=error_context,
|
error_context=error_context,
|
||||||
|
api_key_hint=hint,
|
||||||
)
|
)
|
||||||
if next_entry is not None:
|
if next_entry is not None:
|
||||||
_evict_cached_clients(normalized)
|
_evict_cached_clients(normalized)
|
||||||
@ -2532,6 +2564,7 @@ def _recover_provider_pool(provider: str, exc: Exception) -> bool:
|
|||||||
next_entry = pool.mark_exhausted_and_rotate(
|
next_entry = pool.mark_exhausted_and_rotate(
|
||||||
status_code=status_code if status_code is not None else fallback_status,
|
status_code=status_code if status_code is not None else fallback_status,
|
||||||
error_context=error_context,
|
error_context=error_context,
|
||||||
|
api_key_hint=hint,
|
||||||
)
|
)
|
||||||
if next_entry is not None:
|
if next_entry is not None:
|
||||||
_evict_cached_clients(normalized)
|
_evict_cached_clients(normalized)
|
||||||
@ -2936,6 +2969,11 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
|
|||||||
resolved_provider = "custom"
|
resolved_provider = "custom"
|
||||||
explicit_base_url = runtime_base_url
|
explicit_base_url = runtime_base_url
|
||||||
explicit_api_key = runtime_api_key or None
|
explicit_api_key = runtime_api_key or None
|
||||||
|
elif runtime_api_key:
|
||||||
|
# Pin auxiliary to the same api_key as the active main chat session
|
||||||
|
# so that a working key is reused instead of re-selecting from the pool
|
||||||
|
# (which might pick a different, potentially exhausted key).
|
||||||
|
explicit_api_key = runtime_api_key
|
||||||
# Skip Step-1 if the main provider was recently 402'd. The unhealthy
|
# Skip Step-1 if the main provider was recently 402'd. The unhealthy
|
||||||
# cache TTL bounds how long we bypass it, so a topped-up account
|
# cache TTL bounds how long we bypass it, so a topped-up account
|
||||||
# recovers automatically. If we tried Step-1 anyway, every aux call
|
# recovers automatically. If we tried Step-1 anyway, every aux call
|
||||||
@ -4328,13 +4366,25 @@ def _get_cached_client(
|
|||||||
else:
|
else:
|
||||||
effective = _compat_model(cached_client, model, cached_default)
|
effective = _compat_model(cached_client, model, cached_default)
|
||||||
return cached_client, effective
|
return cached_client, effective
|
||||||
# Build outside the lock
|
# Build outside the lock.
|
||||||
|
# For pool-backed api_key providers, derive the active API key from the
|
||||||
|
# pool entry rather than from env vars. resolve_api_key_provider_credentials
|
||||||
|
# always prefers env vars (first-entry bias), which bypasses pool rotation:
|
||||||
|
# after key #1 is marked exhausted the retry would still get key #1 from
|
||||||
|
# the env var and fail again, causing the retry2_err handler to mark key #2.
|
||||||
|
effective_api_key = api_key
|
||||||
|
if not effective_api_key:
|
||||||
|
_pe = _peek_pool_entry(_normalize_aux_provider(provider))
|
||||||
|
if _pe is not None:
|
||||||
|
_pk = _pool_runtime_api_key(_pe)
|
||||||
|
if _pk:
|
||||||
|
effective_api_key = _pk
|
||||||
client, default_model = resolve_provider_client(
|
client, default_model = resolve_provider_client(
|
||||||
provider,
|
provider,
|
||||||
model,
|
model,
|
||||||
async_mode,
|
async_mode,
|
||||||
explicit_base_url=base_url,
|
explicit_base_url=base_url,
|
||||||
explicit_api_key=api_key,
|
explicit_api_key=effective_api_key,
|
||||||
api_mode=api_mode,
|
api_mode=api_mode,
|
||||||
main_runtime=runtime,
|
main_runtime=runtime,
|
||||||
is_vision=is_vision,
|
is_vision=is_vision,
|
||||||
@ -4948,10 +4998,17 @@ def call_llm(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# ── Same-provider credential-pool recovery ─────────────────────
|
# ── Same-provider credential-pool recovery ─────────────────────
|
||||||
pool_provider = _recoverable_pool_provider(resolved_provider, client)
|
pool_provider = _recoverable_pool_provider(resolved_provider, client, main_runtime=main_runtime)
|
||||||
|
# Capture the exact API key used so mark_exhausted_and_rotate can find
|
||||||
|
# the correct pool entry even when another process rotated the pool
|
||||||
|
# between this call and recovery (which leaves current()=None and makes
|
||||||
|
# _select_unlocked() return the NEXT key by mistake).
|
||||||
|
_client_api_key = str(getattr(client, "api_key", "") or "")
|
||||||
if pool_provider and (_is_auth_error(first_err) or _is_payment_error(first_err) or _is_rate_limit_error(first_err)):
|
if pool_provider and (_is_auth_error(first_err) or _is_payment_error(first_err) or _is_rate_limit_error(first_err)):
|
||||||
recovery_err = first_err
|
recovery_err = first_err
|
||||||
if _is_rate_limit_error(first_err):
|
# Skip the extra retry for clear payment/quota errors — the endpoint
|
||||||
|
# won't accept another request with the same exhausted key.
|
||||||
|
if _is_rate_limit_error(first_err) and not _is_payment_error(first_err):
|
||||||
try:
|
try:
|
||||||
return _validate_llm_response(
|
return _validate_llm_response(
|
||||||
client.chat.completions.create(**kwargs), task)
|
client.chat.completions.create(**kwargs), task)
|
||||||
@ -4959,27 +5016,40 @@ def call_llm(
|
|||||||
if not (_is_auth_error(retry_err) or _is_payment_error(retry_err) or _is_rate_limit_error(retry_err)):
|
if not (_is_auth_error(retry_err) or _is_payment_error(retry_err) or _is_rate_limit_error(retry_err)):
|
||||||
raise
|
raise
|
||||||
recovery_err = retry_err
|
recovery_err = retry_err
|
||||||
if _recover_provider_pool(pool_provider, recovery_err):
|
if _recover_provider_pool(pool_provider, recovery_err, failed_api_key=_client_api_key):
|
||||||
logger.info(
|
logger.info(
|
||||||
"Auxiliary %s: recovered %s via credential-pool rotation after %s",
|
"Auxiliary %s: recovered %s via credential-pool rotation after %s",
|
||||||
task or "call", pool_provider, type(recovery_err).__name__,
|
task or "call", pool_provider, type(recovery_err).__name__,
|
||||||
)
|
)
|
||||||
return _retry_same_provider_sync(
|
try:
|
||||||
task=task,
|
return _retry_same_provider_sync(
|
||||||
resolved_provider=resolved_provider,
|
task=task,
|
||||||
resolved_model=resolved_model,
|
resolved_provider=resolved_provider,
|
||||||
resolved_base_url=resolved_base_url,
|
resolved_model=resolved_model,
|
||||||
resolved_api_key=resolved_api_key,
|
resolved_base_url=resolved_base_url,
|
||||||
resolved_api_mode=resolved_api_mode,
|
resolved_api_key=resolved_api_key,
|
||||||
main_runtime=main_runtime,
|
resolved_api_mode=resolved_api_mode,
|
||||||
final_model=final_model,
|
main_runtime=main_runtime,
|
||||||
messages=messages,
|
final_model=final_model,
|
||||||
temperature=temperature,
|
messages=messages,
|
||||||
max_tokens=max_tokens,
|
temperature=temperature,
|
||||||
tools=tools,
|
max_tokens=max_tokens,
|
||||||
effective_timeout=effective_timeout,
|
tools=tools,
|
||||||
effective_extra_body=effective_extra_body,
|
effective_timeout=effective_timeout,
|
||||||
)
|
effective_extra_body=effective_extra_body,
|
||||||
|
)
|
||||||
|
except Exception as retry2_err:
|
||||||
|
# The rotated key also hit a quota/auth wall. Mark it
|
||||||
|
# immediately so concurrent processes don't make a
|
||||||
|
# redundant API call to discover it's exhausted too.
|
||||||
|
# Then fall through to the payment fallback below so
|
||||||
|
# alternative providers can still serve the request.
|
||||||
|
if (_is_payment_error(retry2_err) or _is_auth_error(retry2_err)
|
||||||
|
or _is_rate_limit_error(retry2_err)):
|
||||||
|
_recover_provider_pool(pool_provider, retry2_err)
|
||||||
|
first_err = retry2_err
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
# ── Payment / credit exhaustion fallback ──────────────────────
|
# ── Payment / credit exhaustion fallback ──────────────────────
|
||||||
# When the resolved provider returns 402 or a credit-related error,
|
# When the resolved provider returns 402 or a credit-related error,
|
||||||
@ -5021,7 +5091,7 @@ def call_llm(
|
|||||||
# 402). Mark THAT label unhealthy so subsequent aux calls
|
# 402). Mark THAT label unhealthy so subsequent aux calls
|
||||||
# skip it instead of paying another doomed RTT.
|
# skip it instead of paying another doomed RTT.
|
||||||
_mark_provider_unhealthy(
|
_mark_provider_unhealthy(
|
||||||
_recoverable_pool_provider(resolved_provider, client) or resolved_provider
|
_recoverable_pool_provider(resolved_provider, client, main_runtime=main_runtime) or resolved_provider
|
||||||
)
|
)
|
||||||
elif _is_rate_limit_error(first_err):
|
elif _is_rate_limit_error(first_err):
|
||||||
reason = "rate limit"
|
reason = "rate limit"
|
||||||
@ -5141,6 +5211,7 @@ async def async_call_llm(
|
|||||||
model: str = None,
|
model: str = None,
|
||||||
base_url: str = None,
|
base_url: str = None,
|
||||||
api_key: str = None,
|
api_key: str = None,
|
||||||
|
main_runtime: Optional[Dict[str, Any]] = None,
|
||||||
messages: list,
|
messages: list,
|
||||||
temperature: float = None,
|
temperature: float = None,
|
||||||
max_tokens: int = None,
|
max_tokens: int = None,
|
||||||
@ -5327,10 +5398,13 @@ async def async_call_llm(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# ── Same-provider credential-pool recovery (mirrors sync) ─────
|
# ── Same-provider credential-pool recovery (mirrors sync) ─────
|
||||||
pool_provider = _recoverable_pool_provider(resolved_provider, client)
|
pool_provider = _recoverable_pool_provider(resolved_provider, client, main_runtime=main_runtime)
|
||||||
|
_client_api_key = str(getattr(client, "api_key", "") or "")
|
||||||
if pool_provider and (_is_auth_error(first_err) or _is_payment_error(first_err) or _is_rate_limit_error(first_err)):
|
if pool_provider and (_is_auth_error(first_err) or _is_payment_error(first_err) or _is_rate_limit_error(first_err)):
|
||||||
recovery_err = first_err
|
recovery_err = first_err
|
||||||
if _is_rate_limit_error(first_err):
|
# Skip the extra retry for clear payment/quota errors — the endpoint
|
||||||
|
# won't accept another request with the same exhausted key.
|
||||||
|
if _is_rate_limit_error(first_err) and not _is_payment_error(first_err):
|
||||||
try:
|
try:
|
||||||
return _validate_llm_response(
|
return _validate_llm_response(
|
||||||
await client.chat.completions.create(**kwargs), task)
|
await client.chat.completions.create(**kwargs), task)
|
||||||
@ -5338,26 +5412,34 @@ async def async_call_llm(
|
|||||||
if not (_is_auth_error(retry_err) or _is_payment_error(retry_err) or _is_rate_limit_error(retry_err)):
|
if not (_is_auth_error(retry_err) or _is_payment_error(retry_err) or _is_rate_limit_error(retry_err)):
|
||||||
raise
|
raise
|
||||||
recovery_err = retry_err
|
recovery_err = retry_err
|
||||||
if _recover_provider_pool(pool_provider, recovery_err):
|
if _recover_provider_pool(pool_provider, recovery_err, failed_api_key=_client_api_key):
|
||||||
logger.info(
|
logger.info(
|
||||||
"Auxiliary %s (async): recovered %s via credential-pool rotation after %s",
|
"Auxiliary %s (async): recovered %s via credential-pool rotation after %s",
|
||||||
task or "call", pool_provider, type(recovery_err).__name__,
|
task or "call", pool_provider, type(recovery_err).__name__,
|
||||||
)
|
)
|
||||||
return await _retry_same_provider_async(
|
try:
|
||||||
task=task,
|
return await _retry_same_provider_async(
|
||||||
resolved_provider=resolved_provider,
|
task=task,
|
||||||
resolved_model=resolved_model,
|
resolved_provider=resolved_provider,
|
||||||
resolved_base_url=resolved_base_url,
|
resolved_model=resolved_model,
|
||||||
resolved_api_key=resolved_api_key,
|
resolved_base_url=resolved_base_url,
|
||||||
resolved_api_mode=resolved_api_mode,
|
resolved_api_key=resolved_api_key,
|
||||||
final_model=final_model,
|
resolved_api_mode=resolved_api_mode,
|
||||||
messages=messages,
|
final_model=final_model,
|
||||||
temperature=temperature,
|
messages=messages,
|
||||||
max_tokens=max_tokens,
|
temperature=temperature,
|
||||||
tools=tools,
|
max_tokens=max_tokens,
|
||||||
effective_timeout=effective_timeout,
|
tools=tools,
|
||||||
effective_extra_body=effective_extra_body,
|
effective_timeout=effective_timeout,
|
||||||
)
|
effective_extra_body=effective_extra_body,
|
||||||
|
)
|
||||||
|
except Exception as retry2_err:
|
||||||
|
if (_is_payment_error(retry2_err) or _is_auth_error(retry2_err)
|
||||||
|
or _is_rate_limit_error(retry2_err)):
|
||||||
|
_recover_provider_pool(pool_provider, retry2_err)
|
||||||
|
first_err = retry2_err
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
# ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
|
# ── Payment / connection / rate-limit fallback (mirrors sync call_llm) ──
|
||||||
should_fallback = (
|
should_fallback = (
|
||||||
|
|||||||
@ -249,6 +249,16 @@ def _extract_retry_delay_seconds(message: str) -> Optional[float]:
|
|||||||
sec_match = re.search(r"retry\s+(?:after\s+)?(\d+(?:\.\d+)?)\s*(?:sec|secs|seconds|s\b)", message, re.IGNORECASE)
|
sec_match = re.search(r"retry\s+(?:after\s+)?(\d+(?:\.\d+)?)\s*(?:sec|secs|seconds|s\b)", message, re.IGNORECASE)
|
||||||
if sec_match:
|
if sec_match:
|
||||||
return float(sec_match.group(1))
|
return float(sec_match.group(1))
|
||||||
|
# "Resets in 4hr 5min" format used by OpenCode Go weekly usage limits
|
||||||
|
hr_min_match = re.search(r"resets?\s+in\s+(\d+)\s*hr\s+(\d+)\s*min", message, re.IGNORECASE)
|
||||||
|
if hr_min_match:
|
||||||
|
return int(hr_min_match.group(1)) * 3600 + int(hr_min_match.group(2)) * 60
|
||||||
|
hr_only_match = re.search(r"resets?\s+in\s+(\d+)\s*hr\b", message, re.IGNORECASE)
|
||||||
|
if hr_only_match:
|
||||||
|
return int(hr_only_match.group(1)) * 3600
|
||||||
|
min_only_match = re.search(r"resets?\s+in\s+(\d+)\s*min\b", message, re.IGNORECASE)
|
||||||
|
if min_only_match:
|
||||||
|
return int(min_only_match.group(1)) * 60
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@ -1265,9 +1275,21 @@ class CredentialPool:
|
|||||||
*,
|
*,
|
||||||
status_code: Optional[int],
|
status_code: Optional[int],
|
||||||
error_context: Optional[Dict[str, Any]] = None,
|
error_context: Optional[Dict[str, Any]] = None,
|
||||||
|
api_key_hint: Optional[str] = None,
|
||||||
) -> Optional[PooledCredential]:
|
) -> Optional[PooledCredential]:
|
||||||
with self._lock:
|
with self._lock:
|
||||||
entry = self.current() or self._select_unlocked()
|
entry = None
|
||||||
|
if api_key_hint:
|
||||||
|
# Prefer the specific entry whose API key matches the one that
|
||||||
|
# actually failed. When this pool was freshly loaded from disk
|
||||||
|
# (another process already rotated), current() is None and
|
||||||
|
# _select_unlocked() would return the NEXT key — the wrong one.
|
||||||
|
entry = next(
|
||||||
|
(e for e in self._entries if e.runtime_api_key == api_key_hint),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if entry is None:
|
||||||
|
entry = self.current() or self._select_unlocked()
|
||||||
if entry is None:
|
if entry is None:
|
||||||
return None
|
return None
|
||||||
_label = entry.label or entry.id[:8]
|
_label = entry.label or entry.id[:8]
|
||||||
|
|||||||
@ -4089,6 +4089,25 @@ class TestCredentialPoolRecovery:
|
|||||||
assert context["reason"] == "usage_limit_reached"
|
assert context["reason"] == "usage_limit_reached"
|
||||||
assert context["message"] == "The usage limit has been reached"
|
assert context["message"] == "The usage limit has been reached"
|
||||||
|
|
||||||
|
def test_extract_api_error_context_parses_resets_in_hours_and_minutes(self, agent, monkeypatch):
|
||||||
|
from agent import agent_runtime_helpers
|
||||||
|
|
||||||
|
monkeypatch.setattr(agent_runtime_helpers.time, "time", lambda: 1_000.0)
|
||||||
|
error = SimpleNamespace(
|
||||||
|
body={
|
||||||
|
"error": {
|
||||||
|
"type": "GoUsageLimitError",
|
||||||
|
"message": "Weekly usage limit reached. Resets in 6hr 29min.",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
response=SimpleNamespace(headers={}),
|
||||||
|
)
|
||||||
|
|
||||||
|
context = agent._extract_api_error_context(error)
|
||||||
|
|
||||||
|
assert context["reason"] == "GoUsageLimitError"
|
||||||
|
assert context["reset_at"] == 1_000.0 + (6 * 60 * 60) + (29 * 60)
|
||||||
|
|
||||||
def test_recover_with_pool_passes_error_context_on_rotated_429(self, agent):
|
def test_recover_with_pool_passes_error_context_on_rotated_429(self, agent):
|
||||||
next_entry = SimpleNamespace(label="secondary")
|
next_entry = SimpleNamespace(label="secondary")
|
||||||
captured = {}
|
captured = {}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user