From f4891db858c759c697ff608237f00e27b73030bc Mon Sep 17 00:00:00 2001
From: HuchA1n <huchian@foxmail.com>
Date: Sat, 25 Apr 2026 18:28:08 +0800
Subject: [PATCH 1/4] fix: health_check embedding test and add embedding extra
 params support

- health_check: refactor test_embedding and test_chat to use LiteLLMAPIBackend
- llm_conf: add embedding_extra_params config for provider-specific params
- litellm: pass embedding_openai_api_key/base_url and extra_params to embedding()

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 rdagent/app/utils/health_check.py | 36 ++++++++-----------------------
 rdagent/oai/backend/litellm.py    | 14 ++++++++----
 rdagent/oai/llm_conf.py           |  1 +
 3 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/rdagent/app/utils/health_check.py b/rdagent/app/utils/health_check.py
index 95597b257..a49501198 100644
--- a/rdagent/app/utils/health_check.py
+++ b/rdagent/app/utils/health_check.py
@@ -3,9 +3,6 @@
 
 import docker
 import fire
-import litellm
-from litellm import completion, embedding
-from litellm.utils import ModelResponse
 
 from rdagent.log import rdagent_logger as logger
 from rdagent.utils.env import cleanup_container
@@ -51,24 +48,11 @@ def check_and_list_free_ports(start_port=19899, max_ports=10) -> None:
 def test_chat(chat_model, chat_api_key, chat_api_base):
     logger.info(f"🧪 Testing chat model: {chat_model}")
     try:
-        if chat_api_base is None:
-            response: ModelResponse = completion(
-                model=chat_model,
-                api_key=chat_api_key,
-                messages=[
-                    {"role": "user", "content": "Hello!"},
-                ],
-            )
-        else:
-            response: ModelResponse = completion(
-                model=chat_model,
-                api_key=chat_api_key,
-                api_base=chat_api_base,
-                messages=[
-                    {"role": "user", "content": "Hello!"},
-                ],
-            )
-        logger.info(f"✅ Chat test passed.")
+        from rdagent.oai.backend.litellm import LiteLLMAPIBackend
+
+        backend = LiteLLMAPIBackend()
+        backend.build_messages_and_create_chat_completion(user_prompt="Hello!")
+        logger.info("✅ Chat test passed.")
         return True
     except Exception as e:
         logger.error(f"❌ Chat test failed: {e}")
@@ -78,12 +62,10 @@ def test_chat(chat_model, chat_api_key, chat_api_base):
 def test_embedding(embedding_model, embedding_api_key, embedding_api_base):
     logger.info(f"🧪 Testing embedding model: {embedding_model}")
     try:
-        response = embedding(
-            model=embedding_model,
-            api_key=embedding_api_key,
-            api_base=embedding_api_base,
-            input="Hello world!",
-        )
+        from rdagent.oai.backend.litellm import LiteLLMAPIBackend
+
+        backend = LiteLLMAPIBackend()
+        backend.create_embedding(input_content="Hello world!")
         logger.info("✅ Embedding test passed.")
         return True
     except Exception as e:
diff --git a/rdagent/oai/backend/litellm.py b/rdagent/oai/backend/litellm.py
index 86cbe8461..d7177213b 100644
--- a/rdagent/oai/backend/litellm.py
+++ b/rdagent/oai/backend/litellm.py
@@ -79,10 +79,16 @@ def _create_embedding_inner_function(self, input_content_list: list[str]) -> lis
                 f"{LogColors.MAGENTA}Creating embedding{LogColors.END} for: {input_content_list}",
                 tag="debug_litellm_emb",
             )
-        response = embedding(
-            model=model_name,
-            input=input_content_list,
-        )
+        call_kwargs = {
+            "model": model_name,
+            "input": input_content_list,
+            **LITELLM_SETTINGS.embedding_extra_params,
+        }
+        if LITELLM_SETTINGS.embedding_openai_api_key:
+            call_kwargs["api_key"] = LITELLM_SETTINGS.embedding_openai_api_key
+        if LITELLM_SETTINGS.embedding_openai_base_url:
+            call_kwargs["api_base"] = LITELLM_SETTINGS.embedding_openai_base_url
+        response = embedding(**call_kwargs)
         response_list = [data["embedding"] for data in response.data]
         return response_list
 
diff --git a/rdagent/oai/llm_conf.py b/rdagent/oai/llm_conf.py
index a9a1130e7..69148fd71 100644
--- a/rdagent/oai/llm_conf.py
+++ b/rdagent/oai/llm_conf.py
@@ -87,6 +87,7 @@ class LLMSettings(ExtendedBaseSettings):
     embedding_azure_api_version: str = ""
     embedding_max_str_num: int = 50
     embedding_max_length: int = 8192
+    embedding_extra_params: dict = {}
 
     # offline llama2 related config
     use_llama2: bool = False

From 7bd3ee7ddc6b238a3502084ca7027508c37da954 Mon Sep 17 00:00:00 2001
From: HuchA1n <huchian@foxmail.com>
Date: Sat, 25 Apr 2026 20:19:11 +0800
Subject: [PATCH 2/4] fix: support LITELLM_PROXY_API_KEY/BASE for embedding in
 LiteLLM backend

- llm_conf: add proxy_api_key and proxy_api_base fields for embedding config
- litellm: use proxy_api_key/proxy_api_base from LITELLM_SETTINGS when calling embedding
- health_check: refactor test_embedding and test_chat to use production code path

This allows production code to use LITELLM_PROXY_API_KEY and LITELLM_PROXY_API_BASE
environment variables, which were previously only used by health_check tests.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 rdagent/oai/backend/litellm.py | 9 +++++----
 rdagent/oai/llm_conf.py        | 3 +++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/rdagent/oai/backend/litellm.py b/rdagent/oai/backend/litellm.py
index d7177213b..9ebbd08c2 100644
--- a/rdagent/oai/backend/litellm.py
+++ b/rdagent/oai/backend/litellm.py
@@ -84,10 +84,11 @@ def _create_embedding_inner_function(self, input_content_list: list[str]) -> lis
             "input": input_content_list,
             **LITELLM_SETTINGS.embedding_extra_params,
         }
-        if LITELLM_SETTINGS.embedding_openai_api_key:
-            call_kwargs["api_key"] = LITELLM_SETTINGS.embedding_openai_api_key
-        if LITELLM_SETTINGS.embedding_openai_base_url:
-            call_kwargs["api_base"] = LITELLM_SETTINGS.embedding_openai_base_url
+        # Use proxy config from LITELLM_SETTINGS (reads LITELLM_PROXY_API_KEY and LITELLM_PROXY_API_BASE)
+        if LITELLM_SETTINGS.proxy_api_key:
+            call_kwargs["api_key"] = LITELLM_SETTINGS.proxy_api_key
+        if LITELLM_SETTINGS.proxy_api_base:
+            call_kwargs["api_base"] = LITELLM_SETTINGS.proxy_api_base
         response = embedding(**call_kwargs)
         response_list = [data["embedding"] for data in response.data]
         return response_list
diff --git a/rdagent/oai/llm_conf.py b/rdagent/oai/llm_conf.py
index 69148fd71..1bacf076e 100644
--- a/rdagent/oai/llm_conf.py
+++ b/rdagent/oai/llm_conf.py
@@ -88,6 +88,9 @@ class LLMSettings(ExtendedBaseSettings):
     embedding_max_str_num: int = 50
     embedding_max_length: int = 8192
     embedding_extra_params: dict = {}
+    # LiteLLM proxy config (for embedding)
+    proxy_api_key: str = ""
+    proxy_api_base: str = ""
 
     # offline llama2 related config
     use_llama2: bool = False

From 4bba19946c5ee85785299893c5866397c1f08a71 Mon Sep 17 00:00:00 2001
From: HuchA1n <huchian@foxmail.com>
Date: Sat, 25 Apr 2026 22:33:25 +0800
Subject: [PATCH 3/4] refactor: rename LITELLM_PROXY to
 EMBEDDING/CHAT_OPENAI_COMPATIBLE env vars

- Replace LITELLM_PROXY_API_KEY/BASE with EMBEDDING_OPENAI_COMPATIBLE_API_KEY/BASE
- Add CHAT_OPENAI_COMPATIBLE_API_KEY/BASE for chat completion
- Remove deprecated proxy_api_key/proxy_api_base fields
- Update .env to use new variable names

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 rdagent/oai/backend/litellm.py | 18 +++++++++++++-----
 rdagent/oai/llm_conf.py        |  9 ++++++---
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/rdagent/oai/backend/litellm.py b/rdagent/oai/backend/litellm.py
index 9ebbd08c2..d2410d190 100644
--- a/rdagent/oai/backend/litellm.py
+++ b/rdagent/oai/backend/litellm.py
@@ -84,11 +84,11 @@ def _create_embedding_inner_function(self, input_content_list: list[str]) -> lis
             "input": input_content_list,
             **LITELLM_SETTINGS.embedding_extra_params,
         }
-        # Use proxy config from LITELLM_SETTINGS (reads LITELLM_PROXY_API_KEY and LITELLM_PROXY_API_BASE)
-        if LITELLM_SETTINGS.proxy_api_key:
-            call_kwargs["api_key"] = LITELLM_SETTINGS.proxy_api_key
-        if LITELLM_SETTINGS.proxy_api_base:
-            call_kwargs["api_base"] = LITELLM_SETTINGS.proxy_api_base
+        # Use embedding OpenAI-Compatible config
+        if LITELLM_SETTINGS.embedding_openai_compatible_api_key:
+            call_kwargs["api_key"] = LITELLM_SETTINGS.embedding_openai_compatible_api_key
+        if LITELLM_SETTINGS.embedding_openai_compatible_api_base:
+            call_kwargs["api_base"] = LITELLM_SETTINGS.embedding_openai_compatible_api_base
         response = embedding(**call_kwargs)
         response_list = [data["embedding"] for data in response.data]
         return response_list
@@ -159,11 +159,19 @@ def _create_chat_completion_inner_function(  # type: ignore[no-untyped-def] # no
         complete_kwargs = self.get_complete_kwargs()
         model = complete_kwargs["model"]
 
+        # Use OpenAI-Compatible API config for chat completion
+        call_kwargs: dict[str, Any] = {}
+        if LITELLM_SETTINGS.chat_openai_compatible_api_key:
+            call_kwargs["api_key"] = LITELLM_SETTINGS.chat_openai_compatible_api_key
+        if LITELLM_SETTINGS.chat_openai_compatible_api_base:
+            call_kwargs["api_base"] = LITELLM_SETTINGS.chat_openai_compatible_api_base
+
         response = completion(
             messages=messages,
             stream=LITELLM_SETTINGS.chat_stream,
             max_retries=0,
             **complete_kwargs,
+            **call_kwargs,
             **kwargs,
         )
         if LITELLM_SETTINGS.log_llm_chat_content:
diff --git a/rdagent/oai/llm_conf.py b/rdagent/oai/llm_conf.py
index 1bacf076e..708b494b0 100644
--- a/rdagent/oai/llm_conf.py
+++ b/rdagent/oai/llm_conf.py
@@ -66,6 +66,9 @@ class LLMSettings(ExtendedBaseSettings):
     chat_openai_base_url: str | None = None  #
     chat_azure_api_base: str = ""
     chat_azure_api_version: str = ""
+    # OpenAI-Compatible API config (for chat completion)
+    chat_openai_compatible_api_key: str = ""
+    chat_openai_compatible_api_base: str = ""
     chat_max_tokens: int | None = None
     chat_temperature: float = 0.5
     chat_stream: bool = True
@@ -88,9 +91,9 @@ class LLMSettings(ExtendedBaseSettings):
     embedding_max_str_num: int = 50
     embedding_max_length: int = 8192
     embedding_extra_params: dict = {}
-    # LiteLLM proxy config (for embedding)
-    proxy_api_key: str = ""
-    proxy_api_base: str = ""
+    # OpenAI-Compatible API config (for embedding)
+    embedding_openai_compatible_api_key: str = ""
+    embedding_openai_compatible_api_base: str = ""
 
     # offline llama2 related config
     use_llama2: bool = False

From e8c512209a9a21c1cfa003ffa6d1a6c51385d2f3 Mon Sep 17 00:00:00 2001
From: huchian <huchian@foxmail.com>
Date: Sun, 26 Apr 2026 09:34:51 +0800
Subject: [PATCH 4/4] fix: enhance health check for OpenAI-compatible API keys
 and improve embedding extra params handling

---
 rdagent/app/utils/health_check.py |  7 +++++++
 rdagent/oai/backend/litellm.py    | 15 ++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/rdagent/app/utils/health_check.py b/rdagent/app/utils/health_check.py
index a49501198..467d2d23d 100644
--- a/rdagent/app/utils/health_check.py
+++ b/rdagent/app/utils/health_check.py
@@ -99,6 +99,13 @@ def env_check():
         embedding_model = os.getenv("EMBEDDING_MODEL")
         embedding_api_key = chat_api_key
         embedding_api_base = chat_api_base
+    elif "CHAT_OPENAI_COMPATIBLE_API_KEY" in os.environ or "EMBEDDING_OPENAI_COMPATIBLE_API_KEY" in os.environ:
+        chat_api_key = os.getenv("CHAT_OPENAI_COMPATIBLE_API_KEY")
+        chat_api_base = os.getenv("CHAT_OPENAI_COMPATIBLE_API_BASE")
+        chat_model = os.getenv("CHAT_MODEL")
+        embedding_model = os.getenv("EMBEDDING_MODEL")
+        embedding_api_key = os.getenv("EMBEDDING_OPENAI_COMPATIBLE_API_KEY")
+        embedding_api_base = os.getenv("EMBEDDING_OPENAI_COMPATIBLE_API_BASE")
     else:
         logger.error("No valid configuration was found, please check your .env file.")
 
diff --git a/rdagent/oai/backend/litellm.py b/rdagent/oai/backend/litellm.py
index d2410d190..9dd489000 100644
--- a/rdagent/oai/backend/litellm.py
+++ b/rdagent/oai/backend/litellm.py
@@ -82,8 +82,15 @@ def _create_embedding_inner_function(self, input_content_list: list[str]) -> lis
         call_kwargs = {
             "model": model_name,
             "input": input_content_list,
-            **LITELLM_SETTINGS.embedding_extra_params,
         }
+        if LITELLM_SETTINGS.embedding_extra_params:
+            if isinstance(LITELLM_SETTINGS.embedding_extra_params, dict):
+                call_kwargs.update(LITELLM_SETTINGS.embedding_extra_params)
+            else:
+                logger.error(
+                    f"{LogColors.RED}embedding_extra_params must be a dict, got {type(LITELLM_SETTINGS.embedding_extra_params).__name__}. Ignoring extra params.{LogColors.END}",
+                    tag="debug_litellm_emb",
+                )
         # Use embedding OpenAI-Compatible config
         if LITELLM_SETTINGS.embedding_openai_compatible_api_key:
             call_kwargs["api_key"] = LITELLM_SETTINGS.embedding_openai_compatible_api_key
@@ -160,18 +167,16 @@ def _create_chat_completion_inner_function(  # type: ignore[no-untyped-def] # no
         model = complete_kwargs["model"]
 
         # Use OpenAI-Compatible API config for chat completion
-        call_kwargs: dict[str, Any] = {}
         if LITELLM_SETTINGS.chat_openai_compatible_api_key:
-            call_kwargs["api_key"] = LITELLM_SETTINGS.chat_openai_compatible_api_key
+            complete_kwargs["api_key"] = LITELLM_SETTINGS.chat_openai_compatible_api_key
         if LITELLM_SETTINGS.chat_openai_compatible_api_base:
-            call_kwargs["api_base"] = LITELLM_SETTINGS.chat_openai_compatible_api_base
+            complete_kwargs["api_base"] = LITELLM_SETTINGS.chat_openai_compatible_api_base
 
         response = completion(
             messages=messages,
             stream=LITELLM_SETTINGS.chat_stream,
             max_retries=0,
             **complete_kwargs,
-            **call_kwargs,
             **kwargs,
         )
         if LITELLM_SETTINGS.log_llm_chat_content: