From e78ae0eb8b26e2ad69956a215bc27b338dd8546c Mon Sep 17 00:00:00 2001
From: FuturMix <dev@futurmix.ai>
Date: Sat, 25 Apr 2026 18:33:09 +0800
Subject: [PATCH] feat: add FuturMix AI Gateway model templates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add model template configuration files for FuturMix (https://futurmix.ai),
an OpenAI-compatible AI gateway that provides unified access to 22+ models
from OpenAI, Anthropic, and Google through a single endpoint.

Templates added:
- model_template_futurmix.yaml — GPT-4o (default)
- model_template_futurmix_claude-sonnet-4.yaml — Claude Sonnet 4
- model_template_futurmix_gemini-2.0-flash.yaml — Gemini 2.0 Flash

All templates use protocol: openai since FuturMix exposes an
OpenAI-compatible /v1/chat/completions endpoint, matching the
recommendation in issue #51 for third-party gateway integration.

Relates to #51

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../template/model_template_futurmix.yaml     | 166 ++++++++++++++++++
 ...del_template_futurmix_claude-sonnet-4.yaml | 166 ++++++++++++++++++
 ...el_template_futurmix_gemini-2.0-flash.yaml | 166 ++++++++++++++++++
 3 files changed, 498 insertions(+)
 create mode 100644 backend/conf/model/template/model_template_futurmix.yaml
 create mode 100644 backend/conf/model/template/model_template_futurmix_claude-sonnet-4.yaml
 create mode 100644 backend/conf/model/template/model_template_futurmix_gemini-2.0-flash.yaml

diff --git a/backend/conf/model/template/model_template_futurmix.yaml b/backend/conf/model/template/model_template_futurmix.yaml
new file mode 100644
index 0000000000..abaec10cc0
--- /dev/null
+++ b/backend/conf/model/template/model_template_futurmix.yaml
@@ -0,0 +1,166 @@
+id: 70010
+name: GPT-4o
+icon_uri: default_icon/openai_v2.png
+icon_url: ""
+description:
+    zh: FuturMix AI Gateway — 通过一个 OpenAI 兼容端点访问 GPT、Claude、Gemini 等 22+ 模型
+    en: FuturMix AI Gateway — access GPT, Claude, Gemini, and 22+ models through one OpenAI-compatible endpoint.
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与"Top p"同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与"生成随机性"同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: frequency_penalty
+      label:
+        zh: 重复语句惩罚
+        en: Frequency penalty
+      desc:
+        zh: '- **frequency penalty**: 当该值为正时，会阻止模型频繁使用相同的词汇和短语，从而增加输出内容的多样性。'
+        en: '**Frequency Penalty**: When positive, it discourages the model from repeating the same words and phrases, thereby increasing the diversity of the output.'
+      type: float
+      min: "-2"
+      max: "2"
+      default_val:
+        default_val: "0"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: presence_penalty
+      label:
+        zh: 重复主题惩罚
+        en: Presence penalty
+      desc:
+        zh: '- **presence penalty**: 当该值为正时，会阻止模型频繁讨论相同的主题，从而增加输出内容的多样性'
+        en: '**Presence Penalty**: When positive, it prevents the model from discussing the same topics repeatedly, thereby increasing the diversity of the output.'
+      type: float
+      min: "-2"
+      max: "2"
+      default_val:
+        default_val: "0"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **文本**: 使用普通文本格式回复\n- **Markdown**: 将引导模型使用Markdown格式输出回复\n- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: Markdown
+          value: "1"
+        - label: JSON
+          value: "2"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    protocol: openai
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+        input_tokens: 128000
+        json_mode: false
+        max_tokens: 128000
+        output_modal:
+            - text
+        output_tokens: 16384
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: "https://futurmix.ai/v1"
+        api_key: ""
+        timeout: 0s
+        model: "gpt-4o"
+        temperature: 0.7
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        max_completion_tokens: 4096
+        top_p: 1
+        top_k: 0
+        stop: []
+        openai:
+            by_azure: false
+            api_version: ""
+            response_format:
+                type: text
+                jsonschema: null
+        custom: {}
+    status: 0
diff --git a/backend/conf/model/template/model_template_futurmix_claude-sonnet-4.yaml b/backend/conf/model/template/model_template_futurmix_claude-sonnet-4.yaml
new file mode 100644
index 0000000000..6b76e4659e
--- /dev/null
+++ b/backend/conf/model/template/model_template_futurmix_claude-sonnet-4.yaml
@@ -0,0 +1,166 @@
+id: 70020
+name: Claude Sonnet 4
+icon_uri: default_icon/openai_v2.png
+icon_url: ""
+description:
+    zh: 通过 FuturMix AI Gateway 使用 Claude Sonnet 4 模型（OpenAI 兼容格式）
+    en: Claude Sonnet 4 via FuturMix AI Gateway (OpenAI-compatible format).
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与"Top p"同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与"生成随机性"同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: frequency_penalty
+      label:
+        zh: 重复语句惩罚
+        en: Frequency penalty
+      desc:
+        zh: '- **frequency penalty**: 当该值为正时，会阻止模型频繁使用相同的词汇和短语，从而增加输出内容的多样性。'
+        en: '**Frequency Penalty**: When positive, it discourages the model from repeating the same words and phrases, thereby increasing the diversity of the output.'
+      type: float
+      min: "-2"
+      max: "2"
+      default_val:
+        default_val: "0"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: presence_penalty
+      label:
+        zh: 重复主题惩罚
+        en: Presence penalty
+      desc:
+        zh: '- **presence penalty**: 当该值为正时，会阻止模型频繁讨论相同的主题，从而增加输出内容的多样性'
+        en: '**Presence Penalty**: When positive, it prevents the model from discussing the same topics repeatedly, thereby increasing the diversity of the output.'
+      type: float
+      min: "-2"
+      max: "2"
+      default_val:
+        default_val: "0"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **文本**: 使用普通文本格式回复\n- **Markdown**: 将引导模型使用Markdown格式输出回复\n- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: Markdown
+          value: "1"
+        - label: JSON
+          value: "2"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    protocol: openai
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+        input_tokens: 200000
+        json_mode: false
+        max_tokens: 200000
+        output_modal:
+            - text
+        output_tokens: 16384
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: "https://futurmix.ai/v1"
+        api_key: ""
+        timeout: 0s
+        model: "claude-sonnet-4-20250514"
+        temperature: 0.7
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        max_completion_tokens: 4096
+        top_p: 1
+        top_k: 0
+        stop: []
+        openai:
+            by_azure: false
+            api_version: ""
+            response_format:
+                type: text
+                jsonschema: null
+        custom: {}
+    status: 0
diff --git a/backend/conf/model/template/model_template_futurmix_gemini-2.0-flash.yaml b/backend/conf/model/template/model_template_futurmix_gemini-2.0-flash.yaml
new file mode 100644
index 0000000000..feff567965
--- /dev/null
+++ b/backend/conf/model/template/model_template_futurmix_gemini-2.0-flash.yaml
@@ -0,0 +1,166 @@
+id: 70030
+name: Gemini 2.0 Flash
+icon_uri: default_icon/openai_v2.png
+icon_url: ""
+description:
+    zh: 通过 FuturMix AI Gateway 使用 Gemini 2.0 Flash 模型（OpenAI 兼容格式）
+    en: Gemini 2.0 Flash via FuturMix AI Gateway (OpenAI-compatible format).
+default_parameters:
+    - name: temperature
+      label:
+        zh: 生成随机性
+        en: Temperature
+      desc:
+        zh: '- **temperature**: 调高温度会使得模型的输出更多样性和创新性，反之，降低温度会使输出内容更加遵循指令要求但减少多样性。建议不要与"Top p"同时调整。'
+        en: '**Temperature**:\n\n- When you increase this value, the model outputs more diverse and innovative content; when you decrease it, the model outputs less diverse content that strictly follows the given instructions.\n- It is recommended not to adjust this value with \"Top p\" at the same time.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        balance: "0.8"
+        creative: "1"
+        default_val: "1.0"
+        precise: "0.3"
+      precision: 1
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: max_tokens
+      label:
+        zh: 最大回复长度
+        en: Response max length
+      desc:
+        zh: 控制模型输出的Tokens 长度上限。通常 100 Tokens 约等于 150 个中文汉字。
+        en: You can specify the maximum length of the tokens output through this value. Typically, 100 tokens are approximately equal to 150 Chinese characters.
+      type: int
+      min: "1"
+      max: "4096"
+      default_val:
+        default_val: "4096"
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+    - name: top_p
+      label:
+        zh: Top P
+        en: Top P
+      desc:
+        zh: '- **Top p 为累计概率**: 模型在生成输出时会从概率最高的词汇开始选择，直到这些词汇的总概率累积达到Top p 值。这样可以限制模型只选择这些高概率的词汇，从而控制输出内容的多样性。建议不要与"生成随机性"同时调整。'
+        en: '**Top P**:\n\n- An alternative to sampling with temperature, where only tokens within the top p probability mass are considered. For example, 0.1 means only the top 10% probability mass tokens are considered.\n- We recommend altering this or temperature, but not both.'
+      type: float
+      min: "0"
+      max: "1"
+      default_val:
+        default_val: "0.7"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: frequency_penalty
+      label:
+        zh: 重复语句惩罚
+        en: Frequency penalty
+      desc:
+        zh: '- **frequency penalty**: 当该值为正时，会阻止模型频繁使用相同的词汇和短语，从而增加输出内容的多样性。'
+        en: '**Frequency Penalty**: When positive, it discourages the model from repeating the same words and phrases, thereby increasing the diversity of the output.'
+      type: float
+      min: "-2"
+      max: "2"
+      default_val:
+        default_val: "0"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: presence_penalty
+      label:
+        zh: 重复主题惩罚
+        en: Presence penalty
+      desc:
+        zh: '- **presence penalty**: 当该值为正时，会阻止模型频繁讨论相同的主题，从而增加输出内容的多样性'
+        en: '**Presence Penalty**: When positive, it prevents the model from discussing the same topics repeatedly, thereby increasing the diversity of the output.'
+      type: float
+      min: "-2"
+      max: "2"
+      default_val:
+        default_val: "0"
+      precision: 2
+      options: []
+      style:
+        widget: slider
+        label:
+            zh: 生成多样性
+            en: Generation diversity
+    - name: response_format
+      label:
+        zh: 输出格式
+        en: Response format
+      desc:
+        zh: '- **文本**: 使用普通文本格式回复\n- **Markdown**: 将引导模型使用Markdown格式输出回复\n- **JSON**: 将引导模型使用JSON格式输出'
+        en: '**Response Format**:\n\n- **Text**: Replies in plain text format\n- **Markdown**: Uses Markdown format for replies\n- **JSON**: Uses JSON format for replies'
+      type: int
+      min: ""
+      max: ""
+      default_val:
+        default_val: "0"
+      options:
+        - label: Text
+          value: "0"
+        - label: Markdown
+          value: "1"
+        - label: JSON
+          value: "2"
+      style:
+        widget: radio_buttons
+        label:
+            zh: 输入及输出设置
+            en: Input and output settings
+meta:
+    protocol: openai
+    capability:
+        function_call: true
+        input_modal:
+            - text
+            - image
+        input_tokens: 1048576
+        json_mode: false
+        max_tokens: 1048576
+        output_modal:
+            - text
+        output_tokens: 8192
+        prefix_caching: false
+        reasoning: false
+        prefill_response: false
+    conn_config:
+        base_url: "https://futurmix.ai/v1"
+        api_key: ""
+        timeout: 0s
+        model: "gemini-2.0-flash"
+        temperature: 0.7
+        frequency_penalty: 0
+        presence_penalty: 0
+        max_tokens: 4096
+        max_completion_tokens: 4096
+        top_p: 1
+        top_k: 0
+        stop: []
+        openai:
+            by_azure: false
+            api_version: ""
+            response_format:
+                type: text
+                jsonschema: null
+        custom: {}
+    status: 0