diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index f27b8fc9b..8f755ebed 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -744,6 +744,116 @@ class AsyncAnthropicAuxiliaryClient: self.base_url = sync_wrapper.base_url +def _endpoint_speaks_anthropic_messages(base_url: str) -> bool: + """True if the endpoint at ``base_url`` speaks the Anthropic Messages + protocol instead of OpenAI chat.completions. + + Mirrors ``hermes_cli.runtime_provider._detect_api_mode_for_url`` so the + auxiliary client and the main agent stay in sync on transport selection. + Covers: + + - Any URL ending in ``/anthropic`` (MiniMax, Zhipu GLM, LiteLLM proxies, + Anthropic-compatible gateways). + - ``api.kimi.com/coding`` (Kimi Coding Plan — the /coding route only + speaks Claude-Code's native Anthropic shape; ``chat.completions`` + returns 404 on Anthropic-only model aliases like ``kimi-for-coding``). + - ``api.anthropic.com`` (native Anthropic). + """ + normalized = (base_url or "").strip().lower().rstrip("/") + if not normalized: + return False + if normalized.endswith("/anthropic"): + return True + hostname = base_url_hostname(normalized) + if hostname == "api.anthropic.com": + return True + if hostname == "api.kimi.com" and "/coding" in normalized: + return True + return False + + +def _maybe_wrap_anthropic( + client_obj: Any, + model: str, + api_key: str, + base_url: str, + api_mode: Optional[str] = None, +) -> Any: + """Rewrap a plain OpenAI client in ``AnthropicAuxiliaryClient`` when + the endpoint actually speaks Anthropic Messages. + + This is the single chokepoint for aux-client transport correction. + Runs at the end of every ``resolve_provider_client`` branch so that + api_key providers (Kimi Coding Plan), the ``custom`` endpoint, and + future /anthropic gateways all land on the right wire format + regardless of which branch built the client. + + Returns ``client_obj`` unchanged when: + + - It's already an Anthropic/Codex/Gemini/CopilotACP wrapper. + - The endpoint is an OpenAI-wire endpoint. + - ``api_mode`` is explicitly set to a non-Anthropic transport. + - The ``anthropic`` SDK is not installed (falls back to OpenAI wire). + """ + # Already wrapped — don't double-wrap. + if isinstance(client_obj, AnthropicAuxiliaryClient): + return client_obj + # Other specialized adapters we should never re-dispatch. + if isinstance(client_obj, CodexAuxiliaryClient): + return client_obj + try: + from agent.gemini_native_adapter import GeminiNativeClient + if isinstance(client_obj, GeminiNativeClient): + return client_obj + except ImportError: + pass + try: + from agent.copilot_acp_client import CopilotACPClient + if isinstance(client_obj, CopilotACPClient): + return client_obj + except ImportError: + pass + + # Explicit non-anthropic api_mode wins over URL heuristics. + if api_mode and api_mode != "anthropic_messages": + return client_obj + + should_wrap = ( + api_mode == "anthropic_messages" + or _endpoint_speaks_anthropic_messages(base_url) + ) + if not should_wrap: + return client_obj + + try: + from agent.anthropic_adapter import build_anthropic_client + except ImportError: + logger.warning( + "Endpoint %s speaks Anthropic Messages but the anthropic SDK is " + "not installed — falling back to OpenAI-wire (will likely 404).", + base_url, + ) + return client_obj + + try: + real_client = build_anthropic_client(api_key, base_url) + except Exception as exc: + logger.warning( + "Failed to build Anthropic client for %s (%s) — falling back to " + "OpenAI-wire client.", base_url, exc, + ) + return client_obj + + logger.debug( + "Auxiliary transport: wrapping client in AnthropicAuxiliaryClient " + "(model=%s, base_url=%s, api_mode=%s)", + model, base_url[:60] if base_url else "", api_mode or "auto-detected", + ) + return AnthropicAuxiliaryClient( + real_client, model, api_key, base_url, is_oauth=False, + ) + + def _read_nous_auth() -> Optional[dict]: """Read and validate ~/.hermes/auth.json for an active Nous provider. @@ -914,7 +1024,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() - return OpenAI(api_key=api_key, base_url=base_url, **extra), model + _client = OpenAI(api_key=api_key, base_url=base_url, **extra) + _client = _maybe_wrap_anthropic(_client, model, api_key, base_url) + return _client, model creds = resolve_api_key_provider_credentials(provider_id) api_key = str(creds.get("api_key", "")).strip() @@ -940,7 +1052,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() - return OpenAI(api_key=api_key, base_url=base_url, **extra), model + _client = OpenAI(api_key=api_key, base_url=base_url, **extra) + _client = _maybe_wrap_anthropic(_client, model, api_key, base_url) + return _client, model return None, None @@ -1224,7 +1338,13 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]: AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False), model, ) - return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model + # URL-based anthropic detection for custom endpoints that didn't set + # api_mode explicitly (e.g. kimi.com/coding reached via custom config). + _fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra) + _fallback_client = _maybe_wrap_anthropic( + _fallback_client, model, custom_key, custom_base, custom_mode, + ) + return _fallback_client, model def _try_codex() -> Tuple[Optional[Any], Optional[str]]: @@ -1775,8 +1895,20 @@ def resolve_provider_client( return True return False - def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""): - """Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed.""" + def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = "", + api_key_str: str = ""): + """Wrap a plain OpenAI client in the correct transport adapter. + + Handles two cases: + - ``CodexAuxiliaryClient`` when the endpoint needs the Responses API + (explicit ``api_mode=codex_responses`` or api.openai.com + codex + model name). + - ``AnthropicAuxiliaryClient`` when the endpoint speaks Anthropic + Messages (explicit ``api_mode=anthropic_messages``, any ``/anthropic`` + suffix, ``api.kimi.com/coding``, or ``api.anthropic.com``). + + Clients that are already specialized wrappers pass through unchanged. + """ if _needs_codex_wrap(client_obj, base_url_str, final_model_str): logger.debug( "resolve_provider_client: wrapping client in CodexAuxiliaryClient " @@ -1784,7 +1916,11 @@ def resolve_provider_client( api_mode or "auto-detected", final_model_str, base_url_str[:60] if base_url_str else "") return CodexAuxiliaryClient(client_obj, final_model_str) - return client_obj + # Anthropic-wire endpoints: rewrap plain OpenAI clients so + # chat.completions.create() is translated to /v1/messages. + return _maybe_wrap_anthropic( + client_obj, final_model_str, api_key_str, base_url_str, api_mode, + ) # ── Auto: try all providers in priority order ──────────────────── if provider == "auto": @@ -1892,7 +2028,7 @@ def resolve_provider_client( is_agent_turn=True, is_vision=is_vision ) client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra) - client = _wrap_if_needed(client, final_model, custom_base) + client = _wrap_if_needed(client, final_model, custom_base, custom_key) return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model)) # Try custom first, then codex, then API-key providers @@ -1902,7 +2038,8 @@ def resolve_provider_client( if client is not None: final_model = _normalize_resolved_model(model or default, provider) _cbase = str(getattr(client, "base_url", "") or "") - client = _wrap_if_needed(client, final_model, _cbase) + _ckey = str(getattr(client, "api_key", "") or "") + client = _wrap_if_needed(client, final_model, _cbase, _ckey) return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model)) logger.warning("resolve_provider_client: custom/main requested " @@ -1983,7 +2120,7 @@ def resolve_provider_client( ): client = CodexAuxiliaryClient(client, final_model) else: - client = _wrap_if_needed(client, final_model, openai_base) + client = _wrap_if_needed(client, final_model, openai_base, custom_key) return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model)) logger.warning( @@ -2076,8 +2213,11 @@ def resolve_provider_client( # Honor api_mode for any API-key provider (e.g. direct OpenAI with # codex-family models). The copilot-specific wrapping above handles - # copilot; this covers the general case (#6800). - client = _wrap_if_needed(client, final_model, base_url) + # copilot; this covers the general case (#6800). Also rewraps + # Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding, + # /anthropic-suffixed gateways) so named providers like kimi-coding + # land on the right transport without needing per-provider branches. + client = _wrap_if_needed(client, final_model, base_url, api_key) logger.debug("resolve_provider_client: %s (%s)", provider, final_model) return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode diff --git a/tests/agent/test_auxiliary_transport_autodetect.py b/tests/agent/test_auxiliary_transport_autodetect.py new file mode 100644 index 000000000..eccb03de0 --- /dev/null +++ b/tests/agent/test_auxiliary_transport_autodetect.py @@ -0,0 +1,237 @@ +"""Tests for transport auto-detection in agent.auxiliary_client. + +Auxiliary clients must pick the correct wire protocol (OpenAI +chat.completions vs native Anthropic Messages) based on the endpoint, +regardless of which resolve_provider_client branch built them. + +Regression target (April 2026): Kimi Coding Plan's ``api.kimi.com/coding`` +endpoint only speaks Anthropic Messages — sending ``kimi-for-coding`` over +chat.completions returns 404 "resource_not_found_error". The named +``kimi-coding`` provider branch in resolve_provider_client used to build a +plain OpenAI client, so title generation / vision / compression / +web_extract all failed on Kimi Coding Plan users. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + + +@pytest.fixture(autouse=True) +def _clean_env(monkeypatch): + for key in ( + "OPENAI_API_KEY", "OPENAI_BASE_URL", + "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", + "KIMI_API_KEY", "KIMI_CODING_API_KEY", "KIMI_BASE_URL", + ): + monkeypatch.delenv(key, raising=False) + + +# --------------------------------------------------------------------------- +# URL detection helper +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize("url,expected,label", [ + ("https://api.kimi.com/coding/v1", True, "Kimi Coding Plan /v1"), + ("https://api.kimi.com/coding", True, "Kimi Coding Plan no /v1"), + ("https://api.moonshot.ai/v1", False, "Moonshot legacy"), + ("https://api.minimax.io/anthropic", True, "MiniMax /anthropic"), + ("https://litellm.example.com/v1/anthropic", True, "/anthropic suffix"), + ("https://api.anthropic.com", True, "native Anthropic"), + ("https://api.anthropic.com/v1", True, "native Anthropic /v1"), + ("https://openrouter.ai/api/v1", False, "OpenRouter"), + ("https://api.openai.com/v1", False, "OpenAI"), + ("https://inference-api.nousresearch.com/v1", False, "Nous"), + ("", False, "empty"), + (None, False, "None"), +]) +def test_endpoint_speaks_anthropic_messages(url, expected, label): + from agent.auxiliary_client import _endpoint_speaks_anthropic_messages + assert _endpoint_speaks_anthropic_messages(url) is expected, ( + f"{label}: {url!r} should be {expected}" + ) + + +# --------------------------------------------------------------------------- +# _maybe_wrap_anthropic decision table +# --------------------------------------------------------------------------- + +def test_maybe_wrap_anthropic_rewraps_kimi_coding_url(): + """Plain OpenAI client pointed at api.kimi.com/coding gets rewrapped.""" + from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient + + plain_client = MagicMock(name="plain_openai") + fake_anthropic = MagicMock(name="anthropic_sdk_client") + + with patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=fake_anthropic, + ): + result = _maybe_wrap_anthropic( + plain_client, "kimi-for-coding", "sk-kimi-test", + "https://api.kimi.com/coding", api_mode=None, + ) + assert isinstance(result, AnthropicAuxiliaryClient) + + +def test_maybe_wrap_anthropic_rewraps_slash_anthropic_url(): + """Plain OpenAI client pointed at any /anthropic URL gets rewrapped.""" + from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient + + plain_client = MagicMock(name="plain_openai") + fake_anthropic = MagicMock(name="anthropic_sdk_client") + + with patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=fake_anthropic, + ): + result = _maybe_wrap_anthropic( + plain_client, "MiniMax-M2.7", "mm-key", + "https://api.minimax.io/anthropic", api_mode=None, + ) + assert isinstance(result, AnthropicAuxiliaryClient) + + +def test_maybe_wrap_anthropic_skips_openai_wire_urls(): + """OpenRouter / OpenAI / Moonshot-legacy stay as plain OpenAI clients.""" + from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient + + plain_client = MagicMock(name="plain_openai") + # No patch on build_anthropic_client — if the function tried to call it, + # we'd get an AttributeError-style failure. The point is it shouldn't. + result = _maybe_wrap_anthropic( + plain_client, "claude-sonnet-4.6", "sk-or-test", + "https://openrouter.ai/api/v1", api_mode=None, + ) + assert result is plain_client + assert not isinstance(result, AnthropicAuxiliaryClient) + + +def test_maybe_wrap_anthropic_respects_explicit_chat_completions(): + """api_mode=chat_completions overrides URL heuristics.""" + from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient + + plain_client = MagicMock(name="plain_openai") + result = _maybe_wrap_anthropic( + plain_client, "kimi-for-coding", "sk-kimi-test", + "https://api.kimi.com/coding", + api_mode="chat_completions", # explicit override + ) + assert result is plain_client, "Explicit chat_completions must bypass wrap" + assert not isinstance(result, AnthropicAuxiliaryClient) + + +def test_maybe_wrap_anthropic_honors_explicit_anthropic_messages(): + """api_mode=anthropic_messages wraps even when URL wouldn't trigger.""" + from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient + + plain_client = MagicMock(name="plain_openai") + fake_anthropic = MagicMock(name="anthropic_sdk_client") + + with patch( + "agent.anthropic_adapter.build_anthropic_client", + return_value=fake_anthropic, + ): + result = _maybe_wrap_anthropic( + plain_client, "model-name", "some-key", + "https://opaque.internal/v1", # URL alone wouldn't trigger + api_mode="anthropic_messages", + ) + assert isinstance(result, AnthropicAuxiliaryClient) + + +def test_maybe_wrap_anthropic_double_wrap_safe(): + """Already-wrapped AnthropicAuxiliaryClient passes through unchanged.""" + from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient + + already_wrapped = MagicMock(spec=AnthropicAuxiliaryClient) + result = _maybe_wrap_anthropic( + already_wrapped, "model", "key", + "https://api.kimi.com/coding", api_mode=None, + ) + assert result is already_wrapped + + +def test_maybe_wrap_anthropic_codex_client_passes_through(): + """CodexAuxiliaryClient is never re-dispatched.""" + from agent.auxiliary_client import ( + _maybe_wrap_anthropic, + CodexAuxiliaryClient, + AnthropicAuxiliaryClient, + ) + + codex_client = MagicMock(spec=CodexAuxiliaryClient) + result = _maybe_wrap_anthropic( + codex_client, "model", "key", + "https://api.kimi.com/coding", api_mode=None, + ) + assert result is codex_client + assert not isinstance(result, AnthropicAuxiliaryClient) + + +def test_maybe_wrap_anthropic_sdk_missing_falls_back(): + """ImportError on anthropic SDK returns plain client with warning.""" + from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient + + plain_client = MagicMock(name="plain_openai") + + def _raise_import(*args, **kwargs): + raise ImportError("no anthropic SDK") + + with patch( + "agent.anthropic_adapter.build_anthropic_client", + side_effect=_raise_import, + ): + # The ImportError is caught on the `from ... import` line inside + # _maybe_wrap_anthropic, which runs before build_anthropic_client is + # called. To exercise the ImportError path we need to patch the + # module lookup itself. + import sys as _sys + saved = _sys.modules.get("agent.anthropic_adapter") + _sys.modules["agent.anthropic_adapter"] = None # force ImportError + try: + result = _maybe_wrap_anthropic( + plain_client, "kimi-for-coding", "sk-kimi-test", + "https://api.kimi.com/coding", api_mode=None, + ) + finally: + if saved is not None: + _sys.modules["agent.anthropic_adapter"] = saved + else: + _sys.modules.pop("agent.anthropic_adapter", None) + + assert result is plain_client + assert not isinstance(result, AnthropicAuxiliaryClient) + + +# --------------------------------------------------------------------------- +# Integration: resolve_provider_client for named kimi-coding provider +# --------------------------------------------------------------------------- + +def test_resolve_provider_client_kimi_coding_wraps_anthropic(monkeypatch, tmp_path): + """End-to-end: resolve_provider_client('kimi-coding', 'kimi-for-coding') + must return AnthropicAuxiliaryClient because /coding speaks Anthropic. + + This is the primary regression guard: the bug that caused title + generation 404s on every Kimi Coding Plan user after the "main model + for every user" aux design shipped. + """ + from agent.auxiliary_client import ( + resolve_provider_client, + AnthropicAuxiliaryClient, + ) + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + # sk-kimi- prefix triggers /coding endpoint auto-detection + monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-faketesttoken123") + + client, model = resolve_provider_client("kimi-coding", "kimi-for-coding") + assert client is not None, "Should resolve a client" + assert isinstance(client, AnthropicAuxiliaryClient), ( + "Kimi Coding Plan endpoint (api.kimi.com/coding) speaks Anthropic " + "Messages — aux client MUST be AnthropicAuxiliaryClient, got " + f"{type(client).__name__}" + ) + assert "kimi.com/coding" in str(client.base_url)