diff --git a/docs/input.md b/docs/input.md index 0271e216af..71caaf4b5e 100644 --- a/docs/input.md +++ b/docs/input.md @@ -143,7 +143,7 @@ Support for file URLs varies depending on type and provider: | Model | Send URL directly | Download and send bytes | Unsupported | |-------|-------------------|-------------------------|-------------| -| [`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel] | `ImageUrl` | `AudioUrl`, `DocumentUrl` | `VideoUrl`. `DocumentUrl` [not supported with `AzureProvider`](models/openai.md#using-azure-with-the-responses-api) | +| [`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel] | `ImageUrl` | `AudioUrl`, `DocumentUrl` | `VideoUrl`. `DocumentUrl` [not supported with `AzureProvider`](models/openai.md#using-azure-with-the-responses-api) or [`AlibabaProvider`](models/openai.md#alibaba-cloud-model-studio-dashscope) | | [`OpenAIResponsesModel`][pydantic_ai.models.openai.OpenAIResponsesModel] | `ImageUrl`, `AudioUrl`, `DocumentUrl` | — | `VideoUrl` | | [`AnthropicModel`][pydantic_ai.models.anthropic.AnthropicModel] | `ImageUrl`, `DocumentUrl` (PDF) | `DocumentUrl` (`text/plain`) | `AudioUrl`, `VideoUrl` | | [`GoogleModel`][pydantic_ai.models.google.GoogleModel] (Google Cloud) | All URL types | — | — | diff --git a/docs/models/openai.md b/docs/models/openai.md index 6617f2c845..53f73350c9 100644 --- a/docs/models/openai.md +++ b/docs/models/openai.md @@ -467,6 +467,9 @@ agent = Agent(model) ... ``` +!!! note "Document input is not supported" + The DashScope compatible-mode Chat Completions API does not accept document content parts, so passing a [`DocumentUrl`][pydantic_ai.messages.DocumentUrl] or document [`BinaryContent`][pydantic_ai.messages.BinaryContent] to an [`OpenAIChatModel`][pydantic_ai.models.openai.OpenAIChatModel] backed by [`AlibabaProvider`][pydantic_ai.providers.alibaba.AlibabaProvider] raises a `UserError`. + ### Ollama See [Ollama](ollama.md) for dedicated Ollama documentation, including structured output and Ollama Cloud limitations. diff --git a/pydantic_ai_slim/pydantic_ai/providers/alibaba.py b/pydantic_ai_slim/pydantic_ai/providers/alibaba.py index e02564c7f1..96a64b08a9 100644 --- a/pydantic_ai_slim/pydantic_ai/providers/alibaba.py +++ b/pydantic_ai_slim/pydantic_ai/providers/alibaba.py @@ -41,8 +41,13 @@ def client(self) -> AsyncOpenAI: def model_profile(model_name: str) -> ModelProfile | None: base_profile = qwen_model_profile(model_name) - # Wrap/merge into OpenAIModelProfile - openai_profile = OpenAIModelProfile(json_schema_transformer=OpenAIJsonSchemaTransformer).update(base_profile) + # Wrap/merge into OpenAIModelProfile. + # Alibaba's compatible-mode Chat Completions API rejects OpenAI `type:file` content parts, + # so document input must fail client-side with a clear UserError. + openai_profile = OpenAIModelProfile( + json_schema_transformer=OpenAIJsonSchemaTransformer, + openai_chat_supports_document_input=False, + ).update(base_profile) # For Qwen Omni models, force URI audio input encoding if 'omni' in model_name.lower(): diff --git a/tests/providers/test_alibaba_provider.py b/tests/providers/test_alibaba_provider.py index 71d6ffdb8f..72cf620f9c 100644 --- a/tests/providers/test_alibaba_provider.py +++ b/tests/providers/test_alibaba_provider.py @@ -1,7 +1,9 @@ import httpx import pytest +from pydantic_ai import Agent from pydantic_ai.exceptions import UserError +from pydantic_ai.messages import BinaryContent, DocumentUrl from pydantic_ai.profiles.openai import OpenAIModelProfile from ..conftest import TestEnv, try_import @@ -9,10 +11,14 @@ with try_import() as imports_successful: import openai + from pydantic_ai.models.openai import OpenAIChatModel from pydantic_ai.providers import infer_provider from pydantic_ai.providers.alibaba import AlibabaProvider -pytestmark = pytest.mark.skipif(not imports_successful(), reason='openai not installed') +pytestmark = [ + pytest.mark.skipif(not imports_successful(), reason='openai not installed'), + pytest.mark.anyio, +] def test_alibaba_provider_init(): @@ -64,6 +70,8 @@ def test_qwen_omni_profile_audio_uri(): profile = provider.model_profile('qwen-omni-turbo') assert isinstance(profile, OpenAIModelProfile) assert profile.openai_chat_audio_input_encoding == 'uri' + # The document-input flag must survive the omni branch's `.update()` merge. + assert profile.openai_chat_supports_document_input is False def test_qwen_non_omni_profile_default(): @@ -92,3 +100,26 @@ def test_alibaba_provider_custom_base_url(): provider = AlibabaProvider(api_key='test-key', base_url='https://custom.endpoint.com/v1') assert provider.base_url == 'https://custom.endpoint.com/v1' assert str(provider.client.base_url).rstrip('/') == 'https://custom.endpoint.com/v1' + + +async def test_alibaba_document_input_not_supported(allow_model_requests: None): + provider = AlibabaProvider(api_key='test-key') + model = OpenAIChatModel(model_name='qwen-max', provider=provider) + agent = Agent(model) + + with pytest.raises(UserError, match='alibaba.*does not support document input'): + await agent.run( + [ + 'Summarize this document', + BinaryContent(data=b'%PDF-1.4 test', media_type='application/pdf'), + ] + ) + + +async def test_alibaba_document_url_input_not_supported(allow_model_requests: None): + provider = AlibabaProvider(api_key='test-key') + model = OpenAIChatModel(model_name='qwen-max', provider=provider) + agent = Agent(model) + + with pytest.raises(UserError, match='alibaba.*does not support document input'): + await agent.run(['Summarize this document', DocumentUrl(url='https://example.com/test.pdf')])