From f4891db858c759c697ff608237f00e27b73030bc Mon Sep 17 00:00:00 2001 From: HuchA1n Date: Sat, 25 Apr 2026 18:28:08 +0800 Subject: [PATCH 1/4] fix: health_check embedding test and add embedding extra params support - health_check: refactor test_embedding and test_chat to use LiteLLMAPIBackend - llm_conf: add embedding_extra_params config for provider-specific params - litellm: pass embedding_openai_api_key/base_url and extra_params to embedding() Co-Authored-By: Claude Opus 4.7 --- rdagent/app/utils/health_check.py | 36 ++++++++----------------------- rdagent/oai/backend/litellm.py | 14 ++++++++---- rdagent/oai/llm_conf.py | 1 + 3 files changed, 20 insertions(+), 31 deletions(-) diff --git a/rdagent/app/utils/health_check.py b/rdagent/app/utils/health_check.py index 95597b257..a49501198 100644 --- a/rdagent/app/utils/health_check.py +++ b/rdagent/app/utils/health_check.py @@ -3,9 +3,6 @@ import docker import fire -import litellm -from litellm import completion, embedding -from litellm.utils import ModelResponse from rdagent.log import rdagent_logger as logger from rdagent.utils.env import cleanup_container @@ -51,24 +48,11 @@ def check_and_list_free_ports(start_port=19899, max_ports=10) -> None: def test_chat(chat_model, chat_api_key, chat_api_base): logger.info(f"๐Ÿงช Testing chat model: {chat_model}") try: - if chat_api_base is None: - response: ModelResponse = completion( - model=chat_model, - api_key=chat_api_key, - messages=[ - {"role": "user", "content": "Hello!"}, - ], - ) - else: - response: ModelResponse = completion( - model=chat_model, - api_key=chat_api_key, - api_base=chat_api_base, - messages=[ - {"role": "user", "content": "Hello!"}, - ], - ) - logger.info(f"โœ… Chat test passed.") + from rdagent.oai.backend.litellm import LiteLLMAPIBackend + + backend = LiteLLMAPIBackend() + backend.build_messages_and_create_chat_completion(user_prompt="Hello!") + logger.info("โœ… Chat test passed.") return True except Exception as e: logger.error(f"โŒ Chat test failed: {e}") @@ -78,12 +62,10 @@ def test_chat(chat_model, chat_api_key, chat_api_base): def test_embedding(embedding_model, embedding_api_key, embedding_api_base): logger.info(f"๐Ÿงช Testing embedding model: {embedding_model}") try: - response = embedding( - model=embedding_model, - api_key=embedding_api_key, - api_base=embedding_api_base, - input="Hello world!", - ) + from rdagent.oai.backend.litellm import LiteLLMAPIBackend + + backend = LiteLLMAPIBackend() + backend.create_embedding(input_content="Hello world!") logger.info("โœ… Embedding test passed.") return True except Exception as e: diff --git a/rdagent/oai/backend/litellm.py b/rdagent/oai/backend/litellm.py index 86cbe8461..d7177213b 100644 --- a/rdagent/oai/backend/litellm.py +++ b/rdagent/oai/backend/litellm.py @@ -79,10 +79,16 @@ def _create_embedding_inner_function(self, input_content_list: list[str]) -> lis f"{LogColors.MAGENTA}Creating embedding{LogColors.END} for: {input_content_list}", tag="debug_litellm_emb", ) - response = embedding( - model=model_name, - input=input_content_list, - ) + call_kwargs = { + "model": model_name, + "input": input_content_list, + **LITELLM_SETTINGS.embedding_extra_params, + } + if LITELLM_SETTINGS.embedding_openai_api_key: + call_kwargs["api_key"] = LITELLM_SETTINGS.embedding_openai_api_key + if LITELLM_SETTINGS.embedding_openai_base_url: + call_kwargs["api_base"] = LITELLM_SETTINGS.embedding_openai_base_url + response = embedding(**call_kwargs) response_list = [data["embedding"] for data in response.data] return response_list diff --git a/rdagent/oai/llm_conf.py b/rdagent/oai/llm_conf.py index a9a1130e7..69148fd71 100644 --- a/rdagent/oai/llm_conf.py +++ b/rdagent/oai/llm_conf.py @@ -87,6 +87,7 @@ class LLMSettings(ExtendedBaseSettings): embedding_azure_api_version: str = "" embedding_max_str_num: int = 50 embedding_max_length: int = 8192 + embedding_extra_params: dict = {} # offline llama2 related config use_llama2: bool = False From 7bd3ee7ddc6b238a3502084ca7027508c37da954 Mon Sep 17 00:00:00 2001 From: HuchA1n Date: Sat, 25 Apr 2026 20:19:11 +0800 Subject: [PATCH 2/4] fix: support LITELLM_PROXY_API_KEY/BASE for embedding in LiteLLM backend - llm_conf: add proxy_api_key and proxy_api_base fields for embedding config - litellm: use proxy_api_key/proxy_api_base from LITELLM_SETTINGS when calling embedding - health_check: refactor test_embedding and test_chat to use production code path This allows production code to use LITELLM_PROXY_API_KEY and LITELLM_PROXY_API_BASE environment variables, which were previously only used by health_check tests. Co-Authored-By: Claude Opus 4.7 --- rdagent/oai/backend/litellm.py | 9 +++++---- rdagent/oai/llm_conf.py | 3 +++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/rdagent/oai/backend/litellm.py b/rdagent/oai/backend/litellm.py index d7177213b..9ebbd08c2 100644 --- a/rdagent/oai/backend/litellm.py +++ b/rdagent/oai/backend/litellm.py @@ -84,10 +84,11 @@ def _create_embedding_inner_function(self, input_content_list: list[str]) -> lis "input": input_content_list, **LITELLM_SETTINGS.embedding_extra_params, } - if LITELLM_SETTINGS.embedding_openai_api_key: - call_kwargs["api_key"] = LITELLM_SETTINGS.embedding_openai_api_key - if LITELLM_SETTINGS.embedding_openai_base_url: - call_kwargs["api_base"] = LITELLM_SETTINGS.embedding_openai_base_url + # Use proxy config from LITELLM_SETTINGS (reads LITELLM_PROXY_API_KEY and LITELLM_PROXY_API_BASE) + if LITELLM_SETTINGS.proxy_api_key: + call_kwargs["api_key"] = LITELLM_SETTINGS.proxy_api_key + if LITELLM_SETTINGS.proxy_api_base: + call_kwargs["api_base"] = LITELLM_SETTINGS.proxy_api_base response = embedding(**call_kwargs) response_list = [data["embedding"] for data in response.data] return response_list diff --git a/rdagent/oai/llm_conf.py b/rdagent/oai/llm_conf.py index 69148fd71..1bacf076e 100644 --- a/rdagent/oai/llm_conf.py +++ b/rdagent/oai/llm_conf.py @@ -88,6 +88,9 @@ class LLMSettings(ExtendedBaseSettings): embedding_max_str_num: int = 50 embedding_max_length: int = 8192 embedding_extra_params: dict = {} + # LiteLLM proxy config (for embedding) + proxy_api_key: str = "" + proxy_api_base: str = "" # offline llama2 related config use_llama2: bool = False From 4bba19946c5ee85785299893c5866397c1f08a71 Mon Sep 17 00:00:00 2001 From: HuchA1n Date: Sat, 25 Apr 2026 22:33:25 +0800 Subject: [PATCH 3/4] refactor: rename LITELLM_PROXY to EMBEDDING/CHAT_OPENAI_COMPATIBLE env vars - Replace LITELLM_PROXY_API_KEY/BASE with EMBEDDING_OPENAI_COMPATIBLE_API_KEY/BASE - Add CHAT_OPENAI_COMPATIBLE_API_KEY/BASE for chat completion - Remove deprecated proxy_api_key/proxy_api_base fields - Update .env to use new variable names Co-Authored-By: Claude Opus 4.7 --- rdagent/oai/backend/litellm.py | 18 +++++++++++++----- rdagent/oai/llm_conf.py | 9 ++++++--- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/rdagent/oai/backend/litellm.py b/rdagent/oai/backend/litellm.py index 9ebbd08c2..d2410d190 100644 --- a/rdagent/oai/backend/litellm.py +++ b/rdagent/oai/backend/litellm.py @@ -84,11 +84,11 @@ def _create_embedding_inner_function(self, input_content_list: list[str]) -> lis "input": input_content_list, **LITELLM_SETTINGS.embedding_extra_params, } - # Use proxy config from LITELLM_SETTINGS (reads LITELLM_PROXY_API_KEY and LITELLM_PROXY_API_BASE) - if LITELLM_SETTINGS.proxy_api_key: - call_kwargs["api_key"] = LITELLM_SETTINGS.proxy_api_key - if LITELLM_SETTINGS.proxy_api_base: - call_kwargs["api_base"] = LITELLM_SETTINGS.proxy_api_base + # Use embedding OpenAI-Compatible config + if LITELLM_SETTINGS.embedding_openai_compatible_api_key: + call_kwargs["api_key"] = LITELLM_SETTINGS.embedding_openai_compatible_api_key + if LITELLM_SETTINGS.embedding_openai_compatible_api_base: + call_kwargs["api_base"] = LITELLM_SETTINGS.embedding_openai_compatible_api_base response = embedding(**call_kwargs) response_list = [data["embedding"] for data in response.data] return response_list @@ -159,11 +159,19 @@ def _create_chat_completion_inner_function( # type: ignore[no-untyped-def] # no complete_kwargs = self.get_complete_kwargs() model = complete_kwargs["model"] + # Use OpenAI-Compatible API config for chat completion + call_kwargs: dict[str, Any] = {} + if LITELLM_SETTINGS.chat_openai_compatible_api_key: + call_kwargs["api_key"] = LITELLM_SETTINGS.chat_openai_compatible_api_key + if LITELLM_SETTINGS.chat_openai_compatible_api_base: + call_kwargs["api_base"] = LITELLM_SETTINGS.chat_openai_compatible_api_base + response = completion( messages=messages, stream=LITELLM_SETTINGS.chat_stream, max_retries=0, **complete_kwargs, + **call_kwargs, **kwargs, ) if LITELLM_SETTINGS.log_llm_chat_content: diff --git a/rdagent/oai/llm_conf.py b/rdagent/oai/llm_conf.py index 1bacf076e..708b494b0 100644 --- a/rdagent/oai/llm_conf.py +++ b/rdagent/oai/llm_conf.py @@ -66,6 +66,9 @@ class LLMSettings(ExtendedBaseSettings): chat_openai_base_url: str | None = None # chat_azure_api_base: str = "" chat_azure_api_version: str = "" + # OpenAI-Compatible API config (for chat completion) + chat_openai_compatible_api_key: str = "" + chat_openai_compatible_api_base: str = "" chat_max_tokens: int | None = None chat_temperature: float = 0.5 chat_stream: bool = True @@ -88,9 +91,9 @@ class LLMSettings(ExtendedBaseSettings): embedding_max_str_num: int = 50 embedding_max_length: int = 8192 embedding_extra_params: dict = {} - # LiteLLM proxy config (for embedding) - proxy_api_key: str = "" - proxy_api_base: str = "" + # OpenAI-Compatible API config (for embedding) + embedding_openai_compatible_api_key: str = "" + embedding_openai_compatible_api_base: str = "" # offline llama2 related config use_llama2: bool = False From e8c512209a9a21c1cfa003ffa6d1a6c51385d2f3 Mon Sep 17 00:00:00 2001 From: huchian Date: Sun, 26 Apr 2026 09:34:51 +0800 Subject: [PATCH 4/4] fix: enhance health check for OpenAI-compatible API keys and improve embedding extra params handling --- rdagent/app/utils/health_check.py | 7 +++++++ rdagent/oai/backend/litellm.py | 15 ++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/rdagent/app/utils/health_check.py b/rdagent/app/utils/health_check.py index a49501198..467d2d23d 100644 --- a/rdagent/app/utils/health_check.py +++ b/rdagent/app/utils/health_check.py @@ -99,6 +99,13 @@ def env_check(): embedding_model = os.getenv("EMBEDDING_MODEL") embedding_api_key = chat_api_key embedding_api_base = chat_api_base + elif "CHAT_OPENAI_COMPATIBLE_API_KEY" in os.environ or "EMBEDDING_OPENAI_COMPATIBLE_API_KEY" in os.environ: + chat_api_key = os.getenv("CHAT_OPENAI_COMPATIBLE_API_KEY") + chat_api_base = os.getenv("CHAT_OPENAI_COMPATIBLE_API_BASE") + chat_model = os.getenv("CHAT_MODEL") + embedding_model = os.getenv("EMBEDDING_MODEL") + embedding_api_key = os.getenv("EMBEDDING_OPENAI_COMPATIBLE_API_KEY") + embedding_api_base = os.getenv("EMBEDDING_OPENAI_COMPATIBLE_API_BASE") else: logger.error("No valid configuration was found, please check your .env file.") diff --git a/rdagent/oai/backend/litellm.py b/rdagent/oai/backend/litellm.py index d2410d190..9dd489000 100644 --- a/rdagent/oai/backend/litellm.py +++ b/rdagent/oai/backend/litellm.py @@ -82,8 +82,15 @@ def _create_embedding_inner_function(self, input_content_list: list[str]) -> lis call_kwargs = { "model": model_name, "input": input_content_list, - **LITELLM_SETTINGS.embedding_extra_params, } + if LITELLM_SETTINGS.embedding_extra_params: + if isinstance(LITELLM_SETTINGS.embedding_extra_params, dict): + call_kwargs.update(LITELLM_SETTINGS.embedding_extra_params) + else: + logger.error( + f"{LogColors.RED}embedding_extra_params must be a dict, got {type(LITELLM_SETTINGS.embedding_extra_params).__name__}. Ignoring extra params.{LogColors.END}", + tag="debug_litellm_emb", + ) # Use embedding OpenAI-Compatible config if LITELLM_SETTINGS.embedding_openai_compatible_api_key: call_kwargs["api_key"] = LITELLM_SETTINGS.embedding_openai_compatible_api_key @@ -160,18 +167,16 @@ def _create_chat_completion_inner_function( # type: ignore[no-untyped-def] # no model = complete_kwargs["model"] # Use OpenAI-Compatible API config for chat completion - call_kwargs: dict[str, Any] = {} if LITELLM_SETTINGS.chat_openai_compatible_api_key: - call_kwargs["api_key"] = LITELLM_SETTINGS.chat_openai_compatible_api_key + complete_kwargs["api_key"] = LITELLM_SETTINGS.chat_openai_compatible_api_key if LITELLM_SETTINGS.chat_openai_compatible_api_base: - call_kwargs["api_base"] = LITELLM_SETTINGS.chat_openai_compatible_api_base + complete_kwargs["api_base"] = LITELLM_SETTINGS.chat_openai_compatible_api_base response = completion( messages=messages, stream=LITELLM_SETTINGS.chat_stream, max_retries=0, **complete_kwargs, - **call_kwargs, **kwargs, ) if LITELLM_SETTINGS.log_llm_chat_content: