From e56324e58af8d15efc818d6d37c33cc999137635 Mon Sep 17 00:00:00 2001 From: Ace Eldeib Date: Tue, 19 May 2026 16:36:24 +0200 Subject: [PATCH] Update W&B active model catalog Use the authenticated W&B /v1/models endpoint as the active model set while keeping Artificial Analysis as the metadata source. Add explicit probe/manual overrides for active models missing AA metadata and refresh the W&B TOMLs to match the 26 enabled models. --- packages/core/script/generate-wandb.ts | 288 ++++++++++++++++-- .../wandb/models/MiniMaxAI/MiniMax-M2.5.toml | 5 +- .../models/OpenPipe/Qwen3-14B-Instruct.toml | 3 +- .../Qwen/Qwen3-235B-A22B-Instruct-2507.toml | 3 +- .../Qwen/Qwen3-235B-A22B-Thinking-2507.toml | 3 +- .../Qwen/Qwen3-30B-A3B-Instruct-2507.toml | 3 +- .../Qwen/Qwen3-Coder-480B-A35B-Instruct.toml | 3 +- providers/wandb/models/Qwen/Qwen3.5-27B.toml | 23 ++ .../wandb/models/Qwen/Qwen3.5-35B-A3B.toml | 23 ++ providers/wandb/models/Qwen/Qwen3.6-27B.toml | 23 ++ .../wandb/models/Qwen/Qwen3.6-35B-A3B.toml | 23 ++ .../models/deepseek-ai/DeepSeek-V3.1.toml | 3 +- .../models/deepseek-ai/DeepSeek-V4-Flash.toml | 23 ++ .../models/deepseek-ai/DeepSeek-V4-Pro.toml | 23 ++ .../wandb/models/google/gemma-4-31B-it.toml | 23 ++ .../granite-4.1-8b.toml} | 17 +- .../meta-llama/Llama-3.1-70B-Instruct.toml | 3 +- .../meta-llama/Llama-3.1-8B-Instruct.toml | 5 +- .../meta-llama/Llama-3.3-70B-Instruct.toml | 5 +- .../Llama-4-Scout-17B-16E-Instruct.toml | 10 +- .../models/microsoft/Phi-4-mini-instruct.toml | 5 +- .../wandb/models/moonshotai/Kimi-K2.5.toml | 7 +- .../wandb/models/moonshotai/Kimi-K2.6.toml | 23 ++ ...NVIDIA-Nemotron-3-Super-120B-A12B-FP8.toml | 5 +- .../wandb/models/openai/gpt-oss-120b.toml | 7 +- .../wandb/models/openai/gpt-oss-20b.toml | 7 +- providers/wandb/models/zai-org/GLM-5.1.toml | 28 +- 27 files changed, 525 insertions(+), 69 deletions(-) create mode 100644 providers/wandb/models/Qwen/Qwen3.5-27B.toml create mode 100644 providers/wandb/models/Qwen/Qwen3.5-35B-A3B.toml create mode 100644 providers/wandb/models/Qwen/Qwen3.6-27B.toml create mode 100644 providers/wandb/models/Qwen/Qwen3.6-35B-A3B.toml create mode 100644 providers/wandb/models/deepseek-ai/DeepSeek-V4-Flash.toml create mode 100644 providers/wandb/models/deepseek-ai/DeepSeek-V4-Pro.toml create mode 100644 providers/wandb/models/google/gemma-4-31B-it.toml rename providers/wandb/models/{zai-org/GLM-5-FP8.toml => ibm-granite/granite-4.1-8b.toml} (50%) create mode 100644 providers/wandb/models/moonshotai/Kimi-K2.6.toml diff --git a/packages/core/script/generate-wandb.ts b/packages/core/script/generate-wandb.ts index 385f235c2..d8c548e72 100644 --- a/packages/core/script/generate-wandb.ts +++ b/packages/core/script/generate-wandb.ts @@ -1,11 +1,12 @@ #!/usr/bin/env bun import path from "node:path"; -import { mkdir } from "node:fs/promises"; +import { mkdir, rm } from "node:fs/promises"; import { z } from "zod"; import { ModelFamilyValues } from "../src/family.js"; -const API_ENDPOINT = "https://trace.wandb.ai/inference/analysis/artificialanalysis/models"; +const ACTIVE_MODELS_ENDPOINT = "https://api.inference.wandb.ai/v1/models"; +const METADATA_ENDPOINT = "https://trace.wandb.ai/inference/analysis/artificialanalysis/models"; const Pricing = z .object({ @@ -13,6 +14,8 @@ const Pricing = z completion: z.string().optional(), image: z.string().optional(), request: z.string().optional(), + input_cache_read: z.string().optional(), + input_cache_write: z.string().optional(), input_cache_reads: z.string().optional(), input_cache_writes: z.string().optional(), }) @@ -39,6 +42,12 @@ const WandbResponse = z }) .strict(); +const ActiveModelsResponse = z + .object({ + data: z.array(z.object({ id: z.string() }).passthrough()), + }) + .passthrough(); + interface ExistingModel { name?: string; family?: string; @@ -100,6 +109,10 @@ interface MergedModel { }; } +type ManualModel = Omit & { + last_updated?: string; +}; + interface Changes { field: string; oldValue: string; @@ -120,16 +133,140 @@ const modalityMap: Record = { const openWeightsPrefixes = new Set([ "deepseek-ai/", + "google/", + "ibm-granite/", "meta-llama/", "microsoft/", "MiniMaxAI/", "moonshotai/", "nvidia/", "OpenPipe/", + "openai/gpt-oss-", "Qwen/", "zai-org/", ]); +// W&B's Artificial Analysis metadata feed does not expose reasoning support. +// These IDs were verified on 2026-05-19 with live /v1/chat/completions probes +// against an authenticated W&B Inference project: the response message included +// a non-empty `reasoning` string for a basic text prompt. +const probedReasoningModelIds = new Set([ + "MiniMaxAI/MiniMax-M2.5", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "Qwen/Qwen3.5-27B", + "Qwen/Qwen3.5-35B-A3B", + "Qwen/Qwen3.6-27B", + "Qwen/Qwen3.6-35B-A3B", + "google/gemma-4-31B-it", + "moonshotai/Kimi-K2.5", + "moonshotai/Kimi-K2.6", + "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8", + "openai/gpt-oss-120b", + "openai/gpt-oss-20b", + "zai-org/GLM-5.1", +]); + +// These IDs were probed in the same run and returned no reasoning payload +// (`message.reasoning` was null) for the basic text prompt. +const probedNoReasoningModelIds = new Set([ + "OpenPipe/Qwen3-14B-Instruct", + "Qwen/Qwen3-235B-A22B-Instruct-2507", + "Qwen/Qwen3-30B-A3B-Instruct-2507", + "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "deepseek-ai/DeepSeek-V3.1", + "deepseek-ai/DeepSeek-V4-Flash", + "deepseek-ai/DeepSeek-V4-Pro", + "ibm-granite/granite-4.1-8b", + "meta-llama/Llama-3.1-70B-Instruct", + "meta-llama/Llama-3.1-8B-Instruct", + "meta-llama/Llama-3.3-70B-Instruct", + "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "microsoft/Phi-4-mini-instruct", +]); + +// Active /v1 models that are missing from the W&B Artificial Analysis metadata +// feed. Functionality flags here were verified on 2026-05-19 with live +// /v1/chat/completions probes. Pricing, limits, release dates, and knowledge +// cutoffs are manually sourced from existing models.dev entries for the same +// model/provider family and preserved here so the generator can cover the full +// active /v1/models list without inventing metadata. +const manualModelOverrides: Record = { + "meta-llama/Llama-4-Scout-17B-16E-Instruct": { + name: "Llama 4 Scout 17B 16E Instruct", + family: "llama", + release_date: "2025-04-05", + attachment: true, + reasoning: false, + structured_output: true, + temperature: true, + tool_call: true, + knowledge: "2024-08", + open_weights: true, + cost: { + input: 0.17, + output: 0.66, + }, + limit: { + context: 64_000, + output: 64_000, + }, + modalities: { + input: ["text", "image"], + output: ["text"], + }, + }, + "moonshotai/Kimi-K2.5": { + name: "Kimi K2.5", + family: "kimi", + release_date: "2026-01-27", + attachment: true, + reasoning: true, + structured_output: true, + temperature: true, + tool_call: true, + open_weights: true, + interleaved: true, + cost: { + input: 0.5, + output: 2.85, + }, + limit: { + context: 262_144, + output: 262_144, + }, + modalities: { + input: ["text", "image"], + output: ["text"], + }, + }, + "zai-org/GLM-5.1": { + name: "GLM-5.1", + family: "glm", + release_date: "2026-03-27", + attachment: false, + reasoning: true, + structured_output: true, + temperature: true, + tool_call: true, + open_weights: false, + interleaved: true, + cost: { + input: 1.4, + output: 4.4, + cache_read: 0.26, + cache_write: 0, + }, + limit: { + context: 200_000, + output: 131_072, + }, + modalities: { + input: ["text"], + output: ["text"], + }, + }, +}; + function timestampToDate(timestamp: number): string { return new Date(timestamp * 1000).toISOString().slice(0, 10); } @@ -195,10 +332,27 @@ function normalizeName(apiModel: z.infer): string { } function inferReasoning(apiModel: z.infer): boolean { + const verified = verifiedReasoning(apiModel.id); + if (verified !== undefined) { + return verified; + } + const text = `${apiModel.id} ${apiModel.name}`.toLowerCase(); return text.includes("thinking") || /\br1\b/.test(text) || text.includes("reasoning"); } +function verifiedReasoning(modelId: string): boolean | undefined { + if (probedReasoningModelIds.has(modelId)) { + return true; + } + + if (probedNoReasoningModelIds.has(modelId)) { + return false; + } + + return undefined; +} + function inferOpenWeights(modelId: string): boolean { for (const prefix of openWeightsPrefixes) { if (modelId.startsWith(prefix)) { @@ -245,12 +399,12 @@ function mergeModel( name: existing?.name ?? normalizeName(apiModel), family: existing?.family ?? inferFamily(apiModel.id, apiModel.name), attachment: existing?.attachment ?? inputModalities.some((m) => m !== "text"), - reasoning: existing?.reasoning ?? inferReasoning(apiModel), + reasoning: verifiedReasoning(apiModel.id) ?? existing?.reasoning ?? inferReasoning(apiModel), tool_call: existing?.tool_call ?? featureSet.has("tools"), temperature: existing?.temperature ?? samplingSet.has("temperature"), release_date: existing?.release_date ?? timestampToDate(apiModel.created), last_updated: getTodayDate(), - open_weights: existing?.open_weights ?? inferOpenWeights(apiModel.id), + open_weights: inferOpenWeights(apiModel.id) || (existing?.open_weights ?? false), ...(existing?.structured_output !== undefined ? { structured_output: existing.structured_output } : featureSet.has("structured_outputs") @@ -277,8 +431,8 @@ function mergeModel( const prompt = apiModel.pricing?.prompt; const completion = apiModel.pricing?.completion; - const cacheRead = apiModel.pricing?.input_cache_reads; - const cacheWrite = apiModel.pricing?.input_cache_writes; + const cacheRead = apiModel.pricing?.input_cache_read ?? apiModel.pricing?.input_cache_reads; + const cacheWrite = apiModel.pricing?.input_cache_write ?? apiModel.pricing?.input_cache_writes; if (prompt && completion) { merged.cost = { @@ -303,6 +457,14 @@ function mergeModel( return merged; } +function mergeManualModel(manual: ManualModel, existing: ExistingModel | null): MergedModel { + return { + ...manual, + last_updated: getTodayDate(), + ...(existing?.status ? { status: existing.status } : {}), + }; +} + function formatToml(model: MergedModel): string { const lines: string[] = []; @@ -406,7 +568,10 @@ function detectChanges(existing: ExistingModel | null, merged: MergedModel): Cha compare("structured_output", existing.structured_output, merged.structured_output); compare("temperature", existing.temperature, merged.temperature); compare("tool_call", existing.tool_call, merged.tool_call); + compare("knowledge", existing.knowledge, merged.knowledge); compare("open_weights", existing.open_weights, merged.open_weights); + compare("interleaved", existing.interleaved, merged.interleaved); + compare("status", existing.status, merged.status); compare("cost.input", existing.cost?.input, merged.cost?.input); compare("cost.output", existing.cost?.output, merged.cost?.output); compare("cost.cache_read", existing.cost?.cache_read, merged.cost?.cache_read); @@ -419,51 +584,107 @@ function detectChanges(existing: ExistingModel | null, merged: MergedModel): Cha return changes; } -async function main() { - const args = process.argv.slice(2); - const dryRun = args.includes("--dry-run"); - const newOnly = args.includes("--new-only"); +async function fetchMetadataModels(): Promise>> { + const res = await fetch(METADATA_ENDPOINT); + if (!res.ok) { + throw new Error(`Failed to fetch W&B metadata API: ${res.status} ${res.statusText}`); + } - const modelsDir = path.join(import.meta.dirname, "..", "..", "..", "providers", "wandb", "models"); + const json = await res.json(); + const parsed = WandbResponse.safeParse(json); + if (!parsed.success) { + parsed.error.cause = { endpoint: METADATA_ENDPOINT }; + throw parsed.error; + } + + return parsed.data.data; +} + +async function fetchActiveModelIds(): Promise { + const apiKey = process.env.WANDB_API_KEY; + const project = process.env.WANDB_INFERENCE_PROJECT ?? process.env.OPENAI_PROJECT; - console.log(`${dryRun ? "[DRY RUN] " : ""}${newOnly ? "[NEW ONLY] " : ""}Fetching WandB models from API...`); + if (!apiKey || !project) { + throw new Error( + "W&B active model sync requires WANDB_API_KEY and WANDB_INFERENCE_PROJECT (or OPENAI_PROJECT).", + ); + } + + const res = await fetch(ACTIVE_MODELS_ENDPOINT, { + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${apiKey}`, + "OpenAI-Project": project, + }, + }); - const res = await fetch(API_ENDPOINT); if (!res.ok) { - console.error(`Failed to fetch API: ${res.status} ${res.statusText}`); - process.exit(1); + throw new Error(`Failed to fetch W&B active models API: ${res.status} ${res.statusText}`); } const json = await res.json(); - const parsed = WandbResponse.safeParse(json); + const parsed = ActiveModelsResponse.safeParse(json); if (!parsed.success) { - console.error("Invalid API response:", parsed.error.errors); - process.exit(1); + parsed.error.cause = { endpoint: ACTIVE_MODELS_ENDPOINT }; + throw parsed.error; + } + + return [...new Set(parsed.data.data.map((model) => model.id))].sort(); +} + +async function main() { + const args = process.argv.slice(2); + const dryRun = args.includes("--dry-run"); + const newOnly = args.includes("--new-only"); + + const modelsDir = path.join(import.meta.dirname, "..", "..", "..", "providers", "wandb", "models"); + + console.log(`${dryRun ? "[DRY RUN] " : ""}${newOnly ? "[NEW ONLY] " : ""}Fetching W&B active model list and metadata...`); + + const [activeModelIds, metadataModels] = await Promise.all([ + fetchActiveModelIds(), + fetchMetadataModels(), + ]); + const metadataById = new Map(metadataModels.map((model) => [model.id, model])); + const missingMetadata = activeModelIds.filter((id) => ( + !metadataById.has(id) && !(id in manualModelOverrides) + )); + + if (missingMetadata.length > 0) { + throw new Error( + `Active W&B models missing metadata or manual overrides: ${missingMetadata.join(", ")}`, + ); } - const apiModels = parsed.data.data; const existingFiles = new Set(); for await (const file of new Bun.Glob("**/*.toml").scan({ cwd: modelsDir, absolute: false })) { existingFiles.add(file); } - console.log(`Found ${apiModels.length} models in API, ${existingFiles.size} existing files\n`); + console.log( + `Found ${activeModelIds.length} active models, ${metadataModels.length} metadata models, ${existingFiles.size} existing files\n`, + ); const apiModelIds = new Set(); let created = 0; let updated = 0; + let deleted = 0; let unchanged = 0; - for (const apiModel of apiModels) { - const relativePath = `${apiModel.id}.toml`; + for (const modelId of activeModelIds) { + const relativePath = `${modelId}.toml`; const filePath = path.join(modelsDir, relativePath); const dirPath = path.dirname(filePath); apiModelIds.add(relativePath); const existing = await loadExistingModel(filePath); - const merged = mergeModel(apiModel, existing); + const apiModel = metadataById.get(modelId); + const manual = manualModelOverrides[modelId]; + const merged = apiModel !== undefined + ? mergeModel(apiModel, existing) + : mergeManualModel(manual, existing); const tomlContent = formatToml(merged); if (existing === null) { @@ -509,16 +730,29 @@ async function main() { console.log(""); } - const orphaned = [...existingFiles].filter((file) => !apiModelIds.has(file)); + const orphaned = [...existingFiles].filter((file) => !apiModelIds.has(file)).sort(); for (const file of orphaned) { - console.log(`Warning: Orphaned file (not in API): ${file}`); + const filePath = path.join(modelsDir, file); + if (newOnly) { + unchanged++; + console.log(`Skipping removal in new-only mode: ${file}`); + continue; + } + + deleted++; + if (dryRun) { + console.log(`[DRY RUN] Would remove inactive model: ${file}`); + } else { + await rm(filePath, { force: true }); + console.log(`Removed inactive model: ${file}`); + } } console.log(""); console.log( dryRun - ? `Summary: ${created} would be created, ${updated} would be updated, ${unchanged} unchanged, ${orphaned.length} orphaned` - : `Summary: ${created} created, ${updated} updated, ${unchanged} unchanged, ${orphaned.length} orphaned`, + ? `Summary: ${created} would be created, ${updated} would be updated, ${deleted} would be removed, ${unchanged} unchanged` + : `Summary: ${created} created, ${updated} updated, ${deleted} removed, ${unchanged} unchanged`, ); } diff --git a/providers/wandb/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/wandb/models/MiniMaxAI/MiniMax-M2.5.toml index e454d4b73..c58bd6b44 100644 --- a/providers/wandb/models/MiniMaxAI/MiniMax-M2.5.toml +++ b/providers/wandb/models/MiniMaxAI/MiniMax-M2.5.toml @@ -1,9 +1,9 @@ name = "MiniMax M2.5" family = "minimax" release_date = "2026-02-12" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false -reasoning = false +reasoning = true structured_output = true temperature = true tool_call = true @@ -12,6 +12,7 @@ open_weights = true [cost] input = 0.3 output = 1.2 +cache_read = 0.3 [limit] context = 196_608 diff --git a/providers/wandb/models/OpenPipe/Qwen3-14B-Instruct.toml b/providers/wandb/models/OpenPipe/Qwen3-14B-Instruct.toml index 9fc6bf78d..b491f80e8 100644 --- a/providers/wandb/models/OpenPipe/Qwen3-14B-Instruct.toml +++ b/providers/wandb/models/OpenPipe/Qwen3-14B-Instruct.toml @@ -1,7 +1,7 @@ name = "OpenPipe Qwen3 14B Instruct" family = "qwen" release_date = "2025-04-29" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false reasoning = false structured_output = true @@ -12,6 +12,7 @@ open_weights = true [cost] input = 0.05 output = 0.22 +cache_read = 0.05 [limit] context = 32_768 diff --git a/providers/wandb/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml b/providers/wandb/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml index a5cd49bca..ce8e7f98e 100644 --- a/providers/wandb/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml +++ b/providers/wandb/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml @@ -1,7 +1,7 @@ name = "Qwen3 235B A22B Instruct 2507" family = "qwen" release_date = "2025-04-28" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false reasoning = false structured_output = true @@ -13,6 +13,7 @@ open_weights = true [cost] input = 0.1 output = 0.1 +cache_read = 0.1 [limit] context = 262_144 diff --git a/providers/wandb/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml b/providers/wandb/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml index a64c78bf2..f46a81872 100644 --- a/providers/wandb/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml +++ b/providers/wandb/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml @@ -1,7 +1,7 @@ name = "Qwen3-235B-A22B-Thinking-2507" family = "qwen" release_date = "2025-07-25" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false reasoning = true structured_output = true @@ -13,6 +13,7 @@ open_weights = true [cost] input = 0.1 output = 0.1 +cache_read = 0.1 [limit] context = 262_144 diff --git a/providers/wandb/models/Qwen/Qwen3-30B-A3B-Instruct-2507.toml b/providers/wandb/models/Qwen/Qwen3-30B-A3B-Instruct-2507.toml index e6004229d..6e39cb38f 100644 --- a/providers/wandb/models/Qwen/Qwen3-30B-A3B-Instruct-2507.toml +++ b/providers/wandb/models/Qwen/Qwen3-30B-A3B-Instruct-2507.toml @@ -1,7 +1,7 @@ name = "Qwen3 30B A3B Instruct 2507" family = "qwen" release_date = "2025-07-29" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false reasoning = false structured_output = true @@ -12,6 +12,7 @@ open_weights = true [cost] input = 0.1 output = 0.3 +cache_read = 0.1 [limit] context = 262_144 diff --git a/providers/wandb/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml b/providers/wandb/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml index ead192d0e..9f6b195e2 100644 --- a/providers/wandb/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml +++ b/providers/wandb/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml @@ -1,7 +1,7 @@ name = "Qwen3-Coder-480B-A35B-Instruct" family = "qwen" release_date = "2025-07-23" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false reasoning = false structured_output = true @@ -13,6 +13,7 @@ open_weights = true [cost] input = 1 output = 1.5 +cache_read = 1 [limit] context = 262_144 diff --git a/providers/wandb/models/Qwen/Qwen3.5-27B.toml b/providers/wandb/models/Qwen/Qwen3.5-27B.toml new file mode 100644 index 000000000..cb6872e19 --- /dev/null +++ b/providers/wandb/models/Qwen/Qwen3.5-27B.toml @@ -0,0 +1,23 @@ +name = "Qwen3.5 27B" +family = "qwen3.5" +release_date = "2026-02-24" +last_updated = "2026-05-19" +attachment = true +reasoning = true +structured_output = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.39 +output = 3.12 +cache_read = 0.08 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/wandb/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/wandb/models/Qwen/Qwen3.5-35B-A3B.toml new file mode 100644 index 000000000..ab334a8a0 --- /dev/null +++ b/providers/wandb/models/Qwen/Qwen3.5-35B-A3B.toml @@ -0,0 +1,23 @@ +name = "Qwen3.5 35B A3B" +family = "qwen3.5" +release_date = "2026-02-24" +last_updated = "2026-05-19" +attachment = true +reasoning = true +structured_output = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.25 +output = 1.25 +cache_read = 0.25 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/wandb/models/Qwen/Qwen3.6-27B.toml b/providers/wandb/models/Qwen/Qwen3.6-27B.toml new file mode 100644 index 000000000..22188ec4b --- /dev/null +++ b/providers/wandb/models/Qwen/Qwen3.6-27B.toml @@ -0,0 +1,23 @@ +name = "Qwen3.6 27B" +family = "qwen3.6" +release_date = "2026-04-22" +last_updated = "2026-05-19" +attachment = true +reasoning = true +structured_output = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.6 +output = 3.6 +cache_read = 0.12 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/wandb/models/Qwen/Qwen3.6-35B-A3B.toml b/providers/wandb/models/Qwen/Qwen3.6-35B-A3B.toml new file mode 100644 index 000000000..aa43904c8 --- /dev/null +++ b/providers/wandb/models/Qwen/Qwen3.6-35B-A3B.toml @@ -0,0 +1,23 @@ +name = "Qwen3.6 35B A3B" +family = "qwen3.6" +release_date = "2026-04-15" +last_updated = "2026-05-19" +attachment = true +reasoning = true +structured_output = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.25 +output = 1.25 +cache_read = 0.25 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/wandb/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/wandb/models/deepseek-ai/DeepSeek-V3.1.toml index f6f4592b3..12a56215f 100644 --- a/providers/wandb/models/deepseek-ai/DeepSeek-V3.1.toml +++ b/providers/wandb/models/deepseek-ai/DeepSeek-V3.1.toml @@ -1,7 +1,7 @@ name = "DeepSeek V3.1" family = "deepseek" release_date = "2025-08-21" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false reasoning = false structured_output = true @@ -12,6 +12,7 @@ open_weights = true [cost] input = 0.55 output = 1.65 +cache_read = 0.55 [limit] context = 161_000 diff --git a/providers/wandb/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/wandb/models/deepseek-ai/DeepSeek-V4-Flash.toml new file mode 100644 index 000000000..d104609cf --- /dev/null +++ b/providers/wandb/models/deepseek-ai/DeepSeek-V4-Flash.toml @@ -0,0 +1,23 @@ +name = "DeepSeek V4 Flash" +family = "deepseek" +release_date = "2026-04-24" +last_updated = "2026-05-19" +attachment = false +reasoning = false +structured_output = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.14 +output = 0.28 +cache_read = 0.07 + +[limit] +context = 1_048_576 +output = 1_048_576 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/wandb/models/deepseek-ai/DeepSeek-V4-Pro.toml b/providers/wandb/models/deepseek-ai/DeepSeek-V4-Pro.toml new file mode 100644 index 000000000..302ad9a04 --- /dev/null +++ b/providers/wandb/models/deepseek-ai/DeepSeek-V4-Pro.toml @@ -0,0 +1,23 @@ +name = "DeepSeek V4 Pro" +family = "deepseek" +release_date = "2026-04-24" +last_updated = "2026-05-19" +attachment = false +reasoning = false +structured_output = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 1.74 +output = 3.48 +cache_read = 0.14 + +[limit] +context = 1_048_576 +output = 1_048_576 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/wandb/models/google/gemma-4-31B-it.toml b/providers/wandb/models/google/gemma-4-31B-it.toml new file mode 100644 index 000000000..4a55841e3 --- /dev/null +++ b/providers/wandb/models/google/gemma-4-31B-it.toml @@ -0,0 +1,23 @@ +name = "Gemma 4 31B" +family = "gemma" +release_date = "2026-04-02" +last_updated = "2026-05-19" +attachment = true +reasoning = true +structured_output = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.3 +output = 1.25 +cache_read = 0.3 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/wandb/models/zai-org/GLM-5-FP8.toml b/providers/wandb/models/ibm-granite/granite-4.1-8b.toml similarity index 50% rename from providers/wandb/models/zai-org/GLM-5-FP8.toml rename to providers/wandb/models/ibm-granite/granite-4.1-8b.toml index e21252d17..19c772f38 100644 --- a/providers/wandb/models/zai-org/GLM-5-FP8.toml +++ b/providers/wandb/models/ibm-granite/granite-4.1-8b.toml @@ -1,7 +1,7 @@ -name = "GLM 5" -family = "glm" -release_date = "2026-02-11" -last_updated = "2026-03-12" +name = "Granite 4.1 8B" +family = "granite" +release_date = "2026-04-29" +last_updated = "2026-05-19" attachment = false reasoning = false structured_output = true @@ -10,12 +10,13 @@ tool_call = true open_weights = true [cost] -input = 1 -output = 3.2 +input = 0.05 +output = 0.1 +cache_read = 0.05 [limit] -context = 200_000 -output = 200_000 +context = 131_072 +output = 131_072 [modalities] input = ["text"] diff --git a/providers/wandb/models/meta-llama/Llama-3.1-70B-Instruct.toml b/providers/wandb/models/meta-llama/Llama-3.1-70B-Instruct.toml index 0d92de4c0..65a47eda7 100644 --- a/providers/wandb/models/meta-llama/Llama-3.1-70B-Instruct.toml +++ b/providers/wandb/models/meta-llama/Llama-3.1-70B-Instruct.toml @@ -1,7 +1,7 @@ name = "Llama 3.1 70B" family = "llama" release_date = "2024-07-23" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false reasoning = false structured_output = true @@ -12,6 +12,7 @@ open_weights = true [cost] input = 0.8 output = 0.8 +cache_read = 0.8 [limit] context = 128_000 diff --git a/providers/wandb/models/meta-llama/Llama-3.1-8B-Instruct.toml b/providers/wandb/models/meta-llama/Llama-3.1-8B-Instruct.toml index 6df9cd97d..12e608e07 100644 --- a/providers/wandb/models/meta-llama/Llama-3.1-8B-Instruct.toml +++ b/providers/wandb/models/meta-llama/Llama-3.1-8B-Instruct.toml @@ -1,9 +1,9 @@ name = "Meta-Llama-3.1-8B-Instruct" family = "llama" release_date = "2024-07-23" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false -reasoning = true +reasoning = false structured_output = true temperature = true tool_call = true @@ -13,6 +13,7 @@ open_weights = true [cost] input = 0.22 output = 0.22 +cache_read = 0.22 [limit] context = 128_000 diff --git a/providers/wandb/models/meta-llama/Llama-3.3-70B-Instruct.toml b/providers/wandb/models/meta-llama/Llama-3.3-70B-Instruct.toml index 613be288a..7db3a7da2 100644 --- a/providers/wandb/models/meta-llama/Llama-3.3-70B-Instruct.toml +++ b/providers/wandb/models/meta-llama/Llama-3.3-70B-Instruct.toml @@ -1,9 +1,9 @@ name = "Llama-3.3-70B-Instruct" family = "llama" release_date = "2024-12-06" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false -reasoning = true +reasoning = false structured_output = true temperature = true tool_call = true @@ -13,6 +13,7 @@ open_weights = true [cost] input = 0.71 output = 0.71 +cache_read = 0.71 [limit] context = 128_000 diff --git a/providers/wandb/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml b/providers/wandb/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml index fd0885dd3..61c3ded54 100644 --- a/providers/wandb/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml +++ b/providers/wandb/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml @@ -1,13 +1,13 @@ name = "Llama 4 Scout 17B 16E Instruct" family = "llama" -release_date = "2025-01-31" -last_updated = "2026-03-12" -attachment = false -reasoning = true +release_date = "2025-04-05" +last_updated = "2026-05-19" +attachment = true +reasoning = false structured_output = true temperature = true tool_call = true -knowledge = "2024-12" +knowledge = "2024-08" open_weights = true [cost] diff --git a/providers/wandb/models/microsoft/Phi-4-mini-instruct.toml b/providers/wandb/models/microsoft/Phi-4-mini-instruct.toml index 6cc415f08..4f33bfac4 100644 --- a/providers/wandb/models/microsoft/Phi-4-mini-instruct.toml +++ b/providers/wandb/models/microsoft/Phi-4-mini-instruct.toml @@ -1,9 +1,9 @@ name = "Phi-4-mini-instruct" family = "phi" release_date = "2024-12-11" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false -reasoning = true +reasoning = false structured_output = true temperature = true tool_call = true @@ -13,6 +13,7 @@ open_weights = true [cost] input = 0.08 output = 0.35 +cache_read = 0.08 [limit] context = 128_000 diff --git a/providers/wandb/models/moonshotai/Kimi-K2.5.toml b/providers/wandb/models/moonshotai/Kimi-K2.5.toml index e7ffb5cb3..2fde70961 100644 --- a/providers/wandb/models/moonshotai/Kimi-K2.5.toml +++ b/providers/wandb/models/moonshotai/Kimi-K2.5.toml @@ -1,7 +1,7 @@ name = "Kimi K2.5" family = "kimi" release_date = "2026-01-27" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = true reasoning = true structured_output = true @@ -9,6 +9,8 @@ temperature = true tool_call = true open_weights = true +interleaved = true + [cost] input = 0.5 output = 2.85 @@ -20,6 +22,3 @@ output = 262_144 [modalities] input = ["text", "image"] output = ["text"] - -[interleaved] -field = "reasoning_content" diff --git a/providers/wandb/models/moonshotai/Kimi-K2.6.toml b/providers/wandb/models/moonshotai/Kimi-K2.6.toml new file mode 100644 index 000000000..a9eb09a26 --- /dev/null +++ b/providers/wandb/models/moonshotai/Kimi-K2.6.toml @@ -0,0 +1,23 @@ +name = "Kimi K2.6" +family = "kimi-k2.6" +release_date = "2026-04-20" +last_updated = "2026-05-19" +attachment = true +reasoning = true +structured_output = true +temperature = true +tool_call = true +open_weights = true + +[cost] +input = 0.95 +output = 4 +cache_read = 0.16 + +[limit] +context = 262_144 +output = 262_144 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/wandb/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8.toml b/providers/wandb/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8.toml index bdaef3f8c..001d64162 100644 --- a/providers/wandb/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8.toml +++ b/providers/wandb/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8.toml @@ -1,9 +1,9 @@ name = "NVIDIA Nemotron 3 Super 120B" family = "nemotron" release_date = "2026-03-11" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false -reasoning = false +reasoning = true structured_output = true temperature = true tool_call = true @@ -12,6 +12,7 @@ open_weights = true [cost] input = 0.2 output = 0.8 +cache_read = 0.2 [limit] context = 262_144 diff --git a/providers/wandb/models/openai/gpt-oss-120b.toml b/providers/wandb/models/openai/gpt-oss-120b.toml index c87249838..f4f5fca7b 100644 --- a/providers/wandb/models/openai/gpt-oss-120b.toml +++ b/providers/wandb/models/openai/gpt-oss-120b.toml @@ -1,17 +1,18 @@ name = "gpt-oss-120b" family = "gpt-oss" release_date = "2025-08-05" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false -reasoning = false +reasoning = true structured_output = true temperature = true tool_call = true -open_weights = false +open_weights = true [cost] input = 0.15 output = 0.6 +cache_read = 0.15 [limit] context = 131_072 diff --git a/providers/wandb/models/openai/gpt-oss-20b.toml b/providers/wandb/models/openai/gpt-oss-20b.toml index 3768e3a41..e7bbd8be3 100644 --- a/providers/wandb/models/openai/gpt-oss-20b.toml +++ b/providers/wandb/models/openai/gpt-oss-20b.toml @@ -1,17 +1,18 @@ name = "gpt-oss-20b" family = "gpt-oss" release_date = "2025-08-05" -last_updated = "2026-03-12" +last_updated = "2026-05-19" attachment = false -reasoning = false +reasoning = true structured_output = true temperature = true tool_call = true -open_weights = false +open_weights = true [cost] input = 0.05 output = 0.2 +cache_read = 0.05 [limit] context = 131_072 diff --git a/providers/wandb/models/zai-org/GLM-5.1.toml b/providers/wandb/models/zai-org/GLM-5.1.toml index d79f6acb9..72a69a42f 100644 --- a/providers/wandb/models/zai-org/GLM-5.1.toml +++ b/providers/wandb/models/zai-org/GLM-5.1.toml @@ -1,2 +1,26 @@ -[extends] -from = "zai/glm-5.1" \ No newline at end of file +name = "GLM-5.1" +family = "glm" +release_date = "2026-03-27" +last_updated = "2026-05-19" +attachment = false +reasoning = true +structured_output = true +temperature = true +tool_call = true +open_weights = false + +interleaved = true + +[cost] +input = 1.4 +output = 4.4 +cache_read = 0.26 +cache_write = 0 + +[limit] +context = 200_000 +output = 131_072 + +[modalities] +input = ["text"] +output = ["text"]