From e56324e58af8d15efc818d6d37c33cc999137635 Mon Sep 17 00:00:00 2001
From: Ace Eldeib <aeldeib@coreweave.com>
Date: Tue, 19 May 2026 16:36:24 +0200
Subject: [PATCH] Update W&B active model catalog

Use the authenticated W&B /v1/models endpoint as the active model set while keeping Artificial Analysis as the metadata source. Add explicit probe/manual overrides for active models missing AA metadata and refresh the W&B TOMLs to match the 26 enabled models.
---
 packages/core/script/generate-wandb.ts        | 288 ++++++++++++++++--
 .../wandb/models/MiniMaxAI/MiniMax-M2.5.toml  |   5 +-
 .../models/OpenPipe/Qwen3-14B-Instruct.toml   |   3 +-
 .../Qwen/Qwen3-235B-A22B-Instruct-2507.toml   |   3 +-
 .../Qwen/Qwen3-235B-A22B-Thinking-2507.toml   |   3 +-
 .../Qwen/Qwen3-30B-A3B-Instruct-2507.toml     |   3 +-
 .../Qwen/Qwen3-Coder-480B-A35B-Instruct.toml  |   3 +-
 providers/wandb/models/Qwen/Qwen3.5-27B.toml  |  23 ++
 .../wandb/models/Qwen/Qwen3.5-35B-A3B.toml    |  23 ++
 providers/wandb/models/Qwen/Qwen3.6-27B.toml  |  23 ++
 .../wandb/models/Qwen/Qwen3.6-35B-A3B.toml    |  23 ++
 .../models/deepseek-ai/DeepSeek-V3.1.toml     |   3 +-
 .../models/deepseek-ai/DeepSeek-V4-Flash.toml |  23 ++
 .../models/deepseek-ai/DeepSeek-V4-Pro.toml   |  23 ++
 .../wandb/models/google/gemma-4-31B-it.toml   |  23 ++
 .../granite-4.1-8b.toml}                      |  17 +-
 .../meta-llama/Llama-3.1-70B-Instruct.toml    |   3 +-
 .../meta-llama/Llama-3.1-8B-Instruct.toml     |   5 +-
 .../meta-llama/Llama-3.3-70B-Instruct.toml    |   5 +-
 .../Llama-4-Scout-17B-16E-Instruct.toml       |  10 +-
 .../models/microsoft/Phi-4-mini-instruct.toml |   5 +-
 .../wandb/models/moonshotai/Kimi-K2.5.toml    |   7 +-
 .../wandb/models/moonshotai/Kimi-K2.6.toml    |  23 ++
 ...NVIDIA-Nemotron-3-Super-120B-A12B-FP8.toml |   5 +-
 .../wandb/models/openai/gpt-oss-120b.toml     |   7 +-
 .../wandb/models/openai/gpt-oss-20b.toml      |   7 +-
 providers/wandb/models/zai-org/GLM-5.1.toml   |  28 +-
 27 files changed, 525 insertions(+), 69 deletions(-)
 create mode 100644 providers/wandb/models/Qwen/Qwen3.5-27B.toml
 create mode 100644 providers/wandb/models/Qwen/Qwen3.5-35B-A3B.toml
 create mode 100644 providers/wandb/models/Qwen/Qwen3.6-27B.toml
 create mode 100644 providers/wandb/models/Qwen/Qwen3.6-35B-A3B.toml
 create mode 100644 providers/wandb/models/deepseek-ai/DeepSeek-V4-Flash.toml
 create mode 100644 providers/wandb/models/deepseek-ai/DeepSeek-V4-Pro.toml
 create mode 100644 providers/wandb/models/google/gemma-4-31B-it.toml
 rename providers/wandb/models/{zai-org/GLM-5-FP8.toml => ibm-granite/granite-4.1-8b.toml} (50%)
 create mode 100644 providers/wandb/models/moonshotai/Kimi-K2.6.toml

diff --git a/packages/core/script/generate-wandb.ts b/packages/core/script/generate-wandb.ts
index 385f235c2..d8c548e72 100644
--- a/packages/core/script/generate-wandb.ts
+++ b/packages/core/script/generate-wandb.ts
@@ -1,11 +1,12 @@
 #!/usr/bin/env bun
 
 import path from "node:path";
-import { mkdir } from "node:fs/promises";
+import { mkdir, rm } from "node:fs/promises";
 import { z } from "zod";
 import { ModelFamilyValues } from "../src/family.js";
 
-const API_ENDPOINT = "https://trace.wandb.ai/inference/analysis/artificialanalysis/models";
+const ACTIVE_MODELS_ENDPOINT = "https://api.inference.wandb.ai/v1/models";
+const METADATA_ENDPOINT = "https://trace.wandb.ai/inference/analysis/artificialanalysis/models";
 
 const Pricing = z
   .object({
@@ -13,6 +14,8 @@ const Pricing = z
     completion: z.string().optional(),
     image: z.string().optional(),
     request: z.string().optional(),
+    input_cache_read: z.string().optional(),
+    input_cache_write: z.string().optional(),
     input_cache_reads: z.string().optional(),
     input_cache_writes: z.string().optional(),
   })
@@ -39,6 +42,12 @@ const WandbResponse = z
   })
   .strict();
 
+const ActiveModelsResponse = z
+  .object({
+    data: z.array(z.object({ id: z.string() }).passthrough()),
+  })
+  .passthrough();
+
 interface ExistingModel {
   name?: string;
   family?: string;
@@ -100,6 +109,10 @@ interface MergedModel {
   };
 }
 
+type ManualModel = Omit<MergedModel, "last_updated"> & {
+  last_updated?: string;
+};
+
 interface Changes {
   field: string;
   oldValue: string;
@@ -120,16 +133,140 @@ const modalityMap: Record<string, SupportedModality | undefined> = {
 
 const openWeightsPrefixes = new Set([
   "deepseek-ai/",
+  "google/",
+  "ibm-granite/",
   "meta-llama/",
   "microsoft/",
   "MiniMaxAI/",
   "moonshotai/",
   "nvidia/",
   "OpenPipe/",
+  "openai/gpt-oss-",
   "Qwen/",
   "zai-org/",
 ]);
 
+// W&B's Artificial Analysis metadata feed does not expose reasoning support.
+// These IDs were verified on 2026-05-19 with live /v1/chat/completions probes
+// against an authenticated W&B Inference project: the response message included
+// a non-empty `reasoning` string for a basic text prompt.
+const probedReasoningModelIds = new Set([
+  "MiniMaxAI/MiniMax-M2.5",
+  "Qwen/Qwen3-235B-A22B-Thinking-2507",
+  "Qwen/Qwen3.5-27B",
+  "Qwen/Qwen3.5-35B-A3B",
+  "Qwen/Qwen3.6-27B",
+  "Qwen/Qwen3.6-35B-A3B",
+  "google/gemma-4-31B-it",
+  "moonshotai/Kimi-K2.5",
+  "moonshotai/Kimi-K2.6",
+  "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8",
+  "openai/gpt-oss-120b",
+  "openai/gpt-oss-20b",
+  "zai-org/GLM-5.1",
+]);
+
+// These IDs were probed in the same run and returned no reasoning payload
+// (`message.reasoning` was null) for the basic text prompt.
+const probedNoReasoningModelIds = new Set([
+  "OpenPipe/Qwen3-14B-Instruct",
+  "Qwen/Qwen3-235B-A22B-Instruct-2507",
+  "Qwen/Qwen3-30B-A3B-Instruct-2507",
+  "Qwen/Qwen3-Coder-480B-A35B-Instruct",
+  "deepseek-ai/DeepSeek-V3.1",
+  "deepseek-ai/DeepSeek-V4-Flash",
+  "deepseek-ai/DeepSeek-V4-Pro",
+  "ibm-granite/granite-4.1-8b",
+  "meta-llama/Llama-3.1-70B-Instruct",
+  "meta-llama/Llama-3.1-8B-Instruct",
+  "meta-llama/Llama-3.3-70B-Instruct",
+  "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "microsoft/Phi-4-mini-instruct",
+]);
+
+// Active /v1 models that are missing from the W&B Artificial Analysis metadata
+// feed. Functionality flags here were verified on 2026-05-19 with live
+// /v1/chat/completions probes. Pricing, limits, release dates, and knowledge
+// cutoffs are manually sourced from existing models.dev entries for the same
+// model/provider family and preserved here so the generator can cover the full
+// active /v1/models list without inventing metadata.
+const manualModelOverrides: Record<string, ManualModel> = {
+  "meta-llama/Llama-4-Scout-17B-16E-Instruct": {
+    name: "Llama 4 Scout 17B 16E Instruct",
+    family: "llama",
+    release_date: "2025-04-05",
+    attachment: true,
+    reasoning: false,
+    structured_output: true,
+    temperature: true,
+    tool_call: true,
+    knowledge: "2024-08",
+    open_weights: true,
+    cost: {
+      input: 0.17,
+      output: 0.66,
+    },
+    limit: {
+      context: 64_000,
+      output: 64_000,
+    },
+    modalities: {
+      input: ["text", "image"],
+      output: ["text"],
+    },
+  },
+  "moonshotai/Kimi-K2.5": {
+    name: "Kimi K2.5",
+    family: "kimi",
+    release_date: "2026-01-27",
+    attachment: true,
+    reasoning: true,
+    structured_output: true,
+    temperature: true,
+    tool_call: true,
+    open_weights: true,
+    interleaved: true,
+    cost: {
+      input: 0.5,
+      output: 2.85,
+    },
+    limit: {
+      context: 262_144,
+      output: 262_144,
+    },
+    modalities: {
+      input: ["text", "image"],
+      output: ["text"],
+    },
+  },
+  "zai-org/GLM-5.1": {
+    name: "GLM-5.1",
+    family: "glm",
+    release_date: "2026-03-27",
+    attachment: false,
+    reasoning: true,
+    structured_output: true,
+    temperature: true,
+    tool_call: true,
+    open_weights: false,
+    interleaved: true,
+    cost: {
+      input: 1.4,
+      output: 4.4,
+      cache_read: 0.26,
+      cache_write: 0,
+    },
+    limit: {
+      context: 200_000,
+      output: 131_072,
+    },
+    modalities: {
+      input: ["text"],
+      output: ["text"],
+    },
+  },
+};
+
 function timestampToDate(timestamp: number): string {
   return new Date(timestamp * 1000).toISOString().slice(0, 10);
 }
@@ -195,10 +332,27 @@ function normalizeName(apiModel: z.infer<typeof WandbModel>): string {
 }
 
 function inferReasoning(apiModel: z.infer<typeof WandbModel>): boolean {
+  const verified = verifiedReasoning(apiModel.id);
+  if (verified !== undefined) {
+    return verified;
+  }
+
   const text = `${apiModel.id} ${apiModel.name}`.toLowerCase();
   return text.includes("thinking") || /\br1\b/.test(text) || text.includes("reasoning");
 }
 
+function verifiedReasoning(modelId: string): boolean | undefined {
+  if (probedReasoningModelIds.has(modelId)) {
+    return true;
+  }
+
+  if (probedNoReasoningModelIds.has(modelId)) {
+    return false;
+  }
+
+  return undefined;
+}
+
 function inferOpenWeights(modelId: string): boolean {
   for (const prefix of openWeightsPrefixes) {
     if (modelId.startsWith(prefix)) {
@@ -245,12 +399,12 @@ function mergeModel(
     name: existing?.name ?? normalizeName(apiModel),
     family: existing?.family ?? inferFamily(apiModel.id, apiModel.name),
     attachment: existing?.attachment ?? inputModalities.some((m) => m !== "text"),
-    reasoning: existing?.reasoning ?? inferReasoning(apiModel),
+    reasoning: verifiedReasoning(apiModel.id) ?? existing?.reasoning ?? inferReasoning(apiModel),
     tool_call: existing?.tool_call ?? featureSet.has("tools"),
     temperature: existing?.temperature ?? samplingSet.has("temperature"),
     release_date: existing?.release_date ?? timestampToDate(apiModel.created),
     last_updated: getTodayDate(),
-    open_weights: existing?.open_weights ?? inferOpenWeights(apiModel.id),
+    open_weights: inferOpenWeights(apiModel.id) || (existing?.open_weights ?? false),
     ...(existing?.structured_output !== undefined
       ? { structured_output: existing.structured_output }
       : featureSet.has("structured_outputs")
@@ -277,8 +431,8 @@ function mergeModel(
 
   const prompt = apiModel.pricing?.prompt;
   const completion = apiModel.pricing?.completion;
-  const cacheRead = apiModel.pricing?.input_cache_reads;
-  const cacheWrite = apiModel.pricing?.input_cache_writes;
+  const cacheRead = apiModel.pricing?.input_cache_read ?? apiModel.pricing?.input_cache_reads;
+  const cacheWrite = apiModel.pricing?.input_cache_write ?? apiModel.pricing?.input_cache_writes;
 
   if (prompt && completion) {
     merged.cost = {
@@ -303,6 +457,14 @@ function mergeModel(
   return merged;
 }
 
+function mergeManualModel(manual: ManualModel, existing: ExistingModel | null): MergedModel {
+  return {
+    ...manual,
+    last_updated: getTodayDate(),
+    ...(existing?.status ? { status: existing.status } : {}),
+  };
+}
+
 function formatToml(model: MergedModel): string {
   const lines: string[] = [];
 
@@ -406,7 +568,10 @@ function detectChanges(existing: ExistingModel | null, merged: MergedModel): Cha
   compare("structured_output", existing.structured_output, merged.structured_output);
   compare("temperature", existing.temperature, merged.temperature);
   compare("tool_call", existing.tool_call, merged.tool_call);
+  compare("knowledge", existing.knowledge, merged.knowledge);
   compare("open_weights", existing.open_weights, merged.open_weights);
+  compare("interleaved", existing.interleaved, merged.interleaved);
+  compare("status", existing.status, merged.status);
   compare("cost.input", existing.cost?.input, merged.cost?.input);
   compare("cost.output", existing.cost?.output, merged.cost?.output);
   compare("cost.cache_read", existing.cost?.cache_read, merged.cost?.cache_read);
@@ -419,51 +584,107 @@ function detectChanges(existing: ExistingModel | null, merged: MergedModel): Cha
   return changes;
 }
 
-async function main() {
-  const args = process.argv.slice(2);
-  const dryRun = args.includes("--dry-run");
-  const newOnly = args.includes("--new-only");
+async function fetchMetadataModels(): Promise<Array<z.infer<typeof WandbModel>>> {
+  const res = await fetch(METADATA_ENDPOINT);
+  if (!res.ok) {
+    throw new Error(`Failed to fetch W&B metadata API: ${res.status} ${res.statusText}`);
+  }
 
-  const modelsDir = path.join(import.meta.dirname, "..", "..", "..", "providers", "wandb", "models");
+  const json = await res.json();
+  const parsed = WandbResponse.safeParse(json);
+  if (!parsed.success) {
+    parsed.error.cause = { endpoint: METADATA_ENDPOINT };
+    throw parsed.error;
+  }
+
+  return parsed.data.data;
+}
+
+async function fetchActiveModelIds(): Promise<string[]> {
+  const apiKey = process.env.WANDB_API_KEY;
+  const project = process.env.WANDB_INFERENCE_PROJECT ?? process.env.OPENAI_PROJECT;
 
-  console.log(`${dryRun ? "[DRY RUN] " : ""}${newOnly ? "[NEW ONLY] " : ""}Fetching WandB models from API...`);
+  if (!apiKey || !project) {
+    throw new Error(
+      "W&B active model sync requires WANDB_API_KEY and WANDB_INFERENCE_PROJECT (or OPENAI_PROJECT).",
+    );
+  }
+
+  const res = await fetch(ACTIVE_MODELS_ENDPOINT, {
+    headers: {
+      "Content-Type": "application/json",
+      "Authorization": `Bearer ${apiKey}`,
+      "OpenAI-Project": project,
+    },
+  });
 
-  const res = await fetch(API_ENDPOINT);
   if (!res.ok) {
-    console.error(`Failed to fetch API: ${res.status} ${res.statusText}`);
-    process.exit(1);
+    throw new Error(`Failed to fetch W&B active models API: ${res.status} ${res.statusText}`);
   }
 
   const json = await res.json();
-  const parsed = WandbResponse.safeParse(json);
+  const parsed = ActiveModelsResponse.safeParse(json);
   if (!parsed.success) {
-    console.error("Invalid API response:", parsed.error.errors);
-    process.exit(1);
+    parsed.error.cause = { endpoint: ACTIVE_MODELS_ENDPOINT };
+    throw parsed.error;
+  }
+
+  return [...new Set(parsed.data.data.map((model) => model.id))].sort();
+}
+
+async function main() {
+  const args = process.argv.slice(2);
+  const dryRun = args.includes("--dry-run");
+  const newOnly = args.includes("--new-only");
+
+  const modelsDir = path.join(import.meta.dirname, "..", "..", "..", "providers", "wandb", "models");
+
+  console.log(`${dryRun ? "[DRY RUN] " : ""}${newOnly ? "[NEW ONLY] " : ""}Fetching W&B active model list and metadata...`);
+
+  const [activeModelIds, metadataModels] = await Promise.all([
+    fetchActiveModelIds(),
+    fetchMetadataModels(),
+  ]);
+  const metadataById = new Map(metadataModels.map((model) => [model.id, model]));
+  const missingMetadata = activeModelIds.filter((id) => (
+    !metadataById.has(id) && !(id in manualModelOverrides)
+  ));
+
+  if (missingMetadata.length > 0) {
+    throw new Error(
+      `Active W&B models missing metadata or manual overrides: ${missingMetadata.join(", ")}`,
+    );
   }
 
-  const apiModels = parsed.data.data;
   const existingFiles = new Set<string>();
 
   for await (const file of new Bun.Glob("**/*.toml").scan({ cwd: modelsDir, absolute: false })) {
     existingFiles.add(file);
   }
 
-  console.log(`Found ${apiModels.length} models in API, ${existingFiles.size} existing files\n`);
+  console.log(
+    `Found ${activeModelIds.length} active models, ${metadataModels.length} metadata models, ${existingFiles.size} existing files\n`,
+  );
 
   const apiModelIds = new Set<string>();
   let created = 0;
   let updated = 0;
+  let deleted = 0;
   let unchanged = 0;
 
-  for (const apiModel of apiModels) {
-    const relativePath = `${apiModel.id}.toml`;
+  for (const modelId of activeModelIds) {
+    const relativePath = `${modelId}.toml`;
     const filePath = path.join(modelsDir, relativePath);
     const dirPath = path.dirname(filePath);
 
     apiModelIds.add(relativePath);
 
     const existing = await loadExistingModel(filePath);
-    const merged = mergeModel(apiModel, existing);
+    const apiModel = metadataById.get(modelId);
+    const manual = manualModelOverrides[modelId];
+    const merged = apiModel !== undefined
+      ? mergeModel(apiModel, existing)
+      : mergeManualModel(manual, existing);
     const tomlContent = formatToml(merged);
 
     if (existing === null) {
@@ -509,16 +730,29 @@ async function main() {
     console.log("");
   }
 
-  const orphaned = [...existingFiles].filter((file) => !apiModelIds.has(file));
+  const orphaned = [...existingFiles].filter((file) => !apiModelIds.has(file)).sort();
   for (const file of orphaned) {
-    console.log(`Warning: Orphaned file (not in API): ${file}`);
+    const filePath = path.join(modelsDir, file);
+    if (newOnly) {
+      unchanged++;
+      console.log(`Skipping removal in new-only mode: ${file}`);
+      continue;
+    }
+
+    deleted++;
+    if (dryRun) {
+      console.log(`[DRY RUN] Would remove inactive model: ${file}`);
+    } else {
+      await rm(filePath, { force: true });
+      console.log(`Removed inactive model: ${file}`);
+    }
   }
 
   console.log("");
   console.log(
     dryRun
-      ? `Summary: ${created} would be created, ${updated} would be updated, ${unchanged} unchanged, ${orphaned.length} orphaned`
-      : `Summary: ${created} created, ${updated} updated, ${unchanged} unchanged, ${orphaned.length} orphaned`,
+      ? `Summary: ${created} would be created, ${updated} would be updated, ${deleted} would be removed, ${unchanged} unchanged`
+      : `Summary: ${created} created, ${updated} updated, ${deleted} removed, ${unchanged} unchanged`,
   );
 }
 
diff --git a/providers/wandb/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/wandb/models/MiniMaxAI/MiniMax-M2.5.toml
index e454d4b73..c58bd6b44 100644
--- a/providers/wandb/models/MiniMaxAI/MiniMax-M2.5.toml
+++ b/providers/wandb/models/MiniMaxAI/MiniMax-M2.5.toml
@@ -1,9 +1,9 @@
 name = "MiniMax M2.5"
 family = "minimax"
 release_date = "2026-02-12"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
-reasoning = false
+reasoning = true
 structured_output = true
 temperature = true
 tool_call = true
@@ -12,6 +12,7 @@ open_weights = true
 [cost]
 input = 0.3
 output = 1.2
+cache_read = 0.3
 
 [limit]
 context = 196_608
diff --git a/providers/wandb/models/OpenPipe/Qwen3-14B-Instruct.toml b/providers/wandb/models/OpenPipe/Qwen3-14B-Instruct.toml
index 9fc6bf78d..b491f80e8 100644
--- a/providers/wandb/models/OpenPipe/Qwen3-14B-Instruct.toml
+++ b/providers/wandb/models/OpenPipe/Qwen3-14B-Instruct.toml
@@ -1,7 +1,7 @@
 name = "OpenPipe Qwen3 14B Instruct"
 family = "qwen"
 release_date = "2025-04-29"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
 reasoning = false
 structured_output = true
@@ -12,6 +12,7 @@ open_weights = true
 [cost]
 input = 0.05
 output = 0.22
+cache_read = 0.05
 
 [limit]
 context = 32_768
diff --git a/providers/wandb/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml b/providers/wandb/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
index a5cd49bca..ce8e7f98e 100644
--- a/providers/wandb/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
+++ b/providers/wandb/models/Qwen/Qwen3-235B-A22B-Instruct-2507.toml
@@ -1,7 +1,7 @@
 name = "Qwen3 235B A22B Instruct 2507"
 family = "qwen"
 release_date = "2025-04-28"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
 reasoning = false
 structured_output = true
@@ -13,6 +13,7 @@ open_weights = true
 [cost]
 input = 0.1
 output = 0.1
+cache_read = 0.1
 
 [limit]
 context = 262_144
diff --git a/providers/wandb/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml b/providers/wandb/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
index a64c78bf2..f46a81872 100644
--- a/providers/wandb/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
+++ b/providers/wandb/models/Qwen/Qwen3-235B-A22B-Thinking-2507.toml
@@ -1,7 +1,7 @@
 name = "Qwen3-235B-A22B-Thinking-2507"
 family = "qwen"
 release_date = "2025-07-25"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
 reasoning = true
 structured_output = true
@@ -13,6 +13,7 @@ open_weights = true
 [cost]
 input = 0.1
 output = 0.1
+cache_read = 0.1
 
 [limit]
 context = 262_144
diff --git a/providers/wandb/models/Qwen/Qwen3-30B-A3B-Instruct-2507.toml b/providers/wandb/models/Qwen/Qwen3-30B-A3B-Instruct-2507.toml
index e6004229d..6e39cb38f 100644
--- a/providers/wandb/models/Qwen/Qwen3-30B-A3B-Instruct-2507.toml
+++ b/providers/wandb/models/Qwen/Qwen3-30B-A3B-Instruct-2507.toml
@@ -1,7 +1,7 @@
 name = "Qwen3 30B A3B Instruct 2507"
 family = "qwen"
 release_date = "2025-07-29"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
 reasoning = false
 structured_output = true
@@ -12,6 +12,7 @@ open_weights = true
 [cost]
 input = 0.1
 output = 0.3
+cache_read = 0.1
 
 [limit]
 context = 262_144
diff --git a/providers/wandb/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml b/providers/wandb/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
index ead192d0e..9f6b195e2 100644
--- a/providers/wandb/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
+++ b/providers/wandb/models/Qwen/Qwen3-Coder-480B-A35B-Instruct.toml
@@ -1,7 +1,7 @@
 name = "Qwen3-Coder-480B-A35B-Instruct"
 family = "qwen"
 release_date = "2025-07-23"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
 reasoning = false
 structured_output = true
@@ -13,6 +13,7 @@ open_weights = true
 [cost]
 input = 1
 output = 1.5
+cache_read = 1
 
 [limit]
 context = 262_144
diff --git a/providers/wandb/models/Qwen/Qwen3.5-27B.toml b/providers/wandb/models/Qwen/Qwen3.5-27B.toml
new file mode 100644
index 000000000..cb6872e19
--- /dev/null
+++ b/providers/wandb/models/Qwen/Qwen3.5-27B.toml
@@ -0,0 +1,23 @@
+name = "Qwen3.5 27B"
+family = "qwen3.5"
+release_date = "2026-02-24"
+last_updated = "2026-05-19"
+attachment = true
+reasoning = true
+structured_output = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.39
+output = 3.12
+cache_read = 0.08
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/wandb/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/wandb/models/Qwen/Qwen3.5-35B-A3B.toml
new file mode 100644
index 000000000..ab334a8a0
--- /dev/null
+++ b/providers/wandb/models/Qwen/Qwen3.5-35B-A3B.toml
@@ -0,0 +1,23 @@
+name = "Qwen3.5 35B A3B"
+family = "qwen3.5"
+release_date = "2026-02-24"
+last_updated = "2026-05-19"
+attachment = true
+reasoning = true
+structured_output = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.25
+output = 1.25
+cache_read = 0.25
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/wandb/models/Qwen/Qwen3.6-27B.toml b/providers/wandb/models/Qwen/Qwen3.6-27B.toml
new file mode 100644
index 000000000..22188ec4b
--- /dev/null
+++ b/providers/wandb/models/Qwen/Qwen3.6-27B.toml
@@ -0,0 +1,23 @@
+name = "Qwen3.6 27B"
+family = "qwen3.6"
+release_date = "2026-04-22"
+last_updated = "2026-05-19"
+attachment = true
+reasoning = true
+structured_output = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.6
+output = 3.6
+cache_read = 0.12
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/wandb/models/Qwen/Qwen3.6-35B-A3B.toml b/providers/wandb/models/Qwen/Qwen3.6-35B-A3B.toml
new file mode 100644
index 000000000..aa43904c8
--- /dev/null
+++ b/providers/wandb/models/Qwen/Qwen3.6-35B-A3B.toml
@@ -0,0 +1,23 @@
+name = "Qwen3.6 35B A3B"
+family = "qwen3.6"
+release_date = "2026-04-15"
+last_updated = "2026-05-19"
+attachment = true
+reasoning = true
+structured_output = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.25
+output = 1.25
+cache_read = 0.25
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/wandb/models/deepseek-ai/DeepSeek-V3.1.toml b/providers/wandb/models/deepseek-ai/DeepSeek-V3.1.toml
index f6f4592b3..12a56215f 100644
--- a/providers/wandb/models/deepseek-ai/DeepSeek-V3.1.toml
+++ b/providers/wandb/models/deepseek-ai/DeepSeek-V3.1.toml
@@ -1,7 +1,7 @@
 name = "DeepSeek V3.1"
 family = "deepseek"
 release_date = "2025-08-21"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
 reasoning = false
 structured_output = true
@@ -12,6 +12,7 @@ open_weights = true
 [cost]
 input = 0.55
 output = 1.65
+cache_read = 0.55
 
 [limit]
 context = 161_000
diff --git a/providers/wandb/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/wandb/models/deepseek-ai/DeepSeek-V4-Flash.toml
new file mode 100644
index 000000000..d104609cf
--- /dev/null
+++ b/providers/wandb/models/deepseek-ai/DeepSeek-V4-Flash.toml
@@ -0,0 +1,23 @@
+name = "DeepSeek V4 Flash"
+family = "deepseek"
+release_date = "2026-04-24"
+last_updated = "2026-05-19"
+attachment = false
+reasoning = false
+structured_output = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.14
+output = 0.28
+cache_read = 0.07
+
+[limit]
+context = 1_048_576
+output = 1_048_576
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/wandb/models/deepseek-ai/DeepSeek-V4-Pro.toml b/providers/wandb/models/deepseek-ai/DeepSeek-V4-Pro.toml
new file mode 100644
index 000000000..302ad9a04
--- /dev/null
+++ b/providers/wandb/models/deepseek-ai/DeepSeek-V4-Pro.toml
@@ -0,0 +1,23 @@
+name = "DeepSeek V4 Pro"
+family = "deepseek"
+release_date = "2026-04-24"
+last_updated = "2026-05-19"
+attachment = false
+reasoning = false
+structured_output = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 1.74
+output = 3.48
+cache_read = 0.14
+
+[limit]
+context = 1_048_576
+output = 1_048_576
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/wandb/models/google/gemma-4-31B-it.toml b/providers/wandb/models/google/gemma-4-31B-it.toml
new file mode 100644
index 000000000..4a55841e3
--- /dev/null
+++ b/providers/wandb/models/google/gemma-4-31B-it.toml
@@ -0,0 +1,23 @@
+name = "Gemma 4 31B"
+family = "gemma"
+release_date = "2026-04-02"
+last_updated = "2026-05-19"
+attachment = true
+reasoning = true
+structured_output = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.3
+output = 1.25
+cache_read = 0.3
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/wandb/models/zai-org/GLM-5-FP8.toml b/providers/wandb/models/ibm-granite/granite-4.1-8b.toml
similarity index 50%
rename from providers/wandb/models/zai-org/GLM-5-FP8.toml
rename to providers/wandb/models/ibm-granite/granite-4.1-8b.toml
index e21252d17..19c772f38 100644
--- a/providers/wandb/models/zai-org/GLM-5-FP8.toml
+++ b/providers/wandb/models/ibm-granite/granite-4.1-8b.toml
@@ -1,7 +1,7 @@
-name = "GLM 5"
-family = "glm"
-release_date = "2026-02-11"
-last_updated = "2026-03-12"
+name = "Granite 4.1 8B"
+family = "granite"
+release_date = "2026-04-29"
+last_updated = "2026-05-19"
 attachment = false
 reasoning = false
 structured_output = true
@@ -10,12 +10,13 @@ tool_call = true
 open_weights = true
 
 [cost]
-input = 1
-output = 3.2
+input = 0.05
+output = 0.1
+cache_read = 0.05
 
 [limit]
-context = 200_000
-output = 200_000
+context = 131_072
+output = 131_072
 
 [modalities]
 input = ["text"]
diff --git a/providers/wandb/models/meta-llama/Llama-3.1-70B-Instruct.toml b/providers/wandb/models/meta-llama/Llama-3.1-70B-Instruct.toml
index 0d92de4c0..65a47eda7 100644
--- a/providers/wandb/models/meta-llama/Llama-3.1-70B-Instruct.toml
+++ b/providers/wandb/models/meta-llama/Llama-3.1-70B-Instruct.toml
@@ -1,7 +1,7 @@
 name = "Llama 3.1 70B"
 family = "llama"
 release_date = "2024-07-23"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
 reasoning = false
 structured_output = true
@@ -12,6 +12,7 @@ open_weights = true
 [cost]
 input = 0.8
 output = 0.8
+cache_read = 0.8
 
 [limit]
 context = 128_000
diff --git a/providers/wandb/models/meta-llama/Llama-3.1-8B-Instruct.toml b/providers/wandb/models/meta-llama/Llama-3.1-8B-Instruct.toml
index 6df9cd97d..12e608e07 100644
--- a/providers/wandb/models/meta-llama/Llama-3.1-8B-Instruct.toml
+++ b/providers/wandb/models/meta-llama/Llama-3.1-8B-Instruct.toml
@@ -1,9 +1,9 @@
 name = "Meta-Llama-3.1-8B-Instruct"
 family = "llama"
 release_date = "2024-07-23"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
-reasoning = true
+reasoning = false
 structured_output = true
 temperature = true
 tool_call = true
@@ -13,6 +13,7 @@ open_weights = true
 [cost]
 input = 0.22
 output = 0.22
+cache_read = 0.22
 
 [limit]
 context = 128_000
diff --git a/providers/wandb/models/meta-llama/Llama-3.3-70B-Instruct.toml b/providers/wandb/models/meta-llama/Llama-3.3-70B-Instruct.toml
index 613be288a..7db3a7da2 100644
--- a/providers/wandb/models/meta-llama/Llama-3.3-70B-Instruct.toml
+++ b/providers/wandb/models/meta-llama/Llama-3.3-70B-Instruct.toml
@@ -1,9 +1,9 @@
 name = "Llama-3.3-70B-Instruct"
 family = "llama"
 release_date = "2024-12-06"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
-reasoning = true
+reasoning = false
 structured_output = true
 temperature = true
 tool_call = true
@@ -13,6 +13,7 @@ open_weights = true
 [cost]
 input = 0.71
 output = 0.71
+cache_read = 0.71
 
 [limit]
 context = 128_000
diff --git a/providers/wandb/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml b/providers/wandb/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
index fd0885dd3..61c3ded54 100644
--- a/providers/wandb/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
+++ b/providers/wandb/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
@@ -1,13 +1,13 @@
 name = "Llama 4 Scout 17B 16E Instruct"
 family = "llama"
-release_date = "2025-01-31"
-last_updated = "2026-03-12"
-attachment = false
-reasoning = true
+release_date = "2025-04-05"
+last_updated = "2026-05-19"
+attachment = true
+reasoning = false
 structured_output = true
 temperature = true
 tool_call = true
-knowledge = "2024-12"
+knowledge = "2024-08"
 open_weights = true
 
 [cost]
diff --git a/providers/wandb/models/microsoft/Phi-4-mini-instruct.toml b/providers/wandb/models/microsoft/Phi-4-mini-instruct.toml
index 6cc415f08..4f33bfac4 100644
--- a/providers/wandb/models/microsoft/Phi-4-mini-instruct.toml
+++ b/providers/wandb/models/microsoft/Phi-4-mini-instruct.toml
@@ -1,9 +1,9 @@
 name = "Phi-4-mini-instruct"
 family = "phi"
 release_date = "2024-12-11"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
-reasoning = true
+reasoning = false
 structured_output = true
 temperature = true
 tool_call = true
@@ -13,6 +13,7 @@ open_weights = true
 [cost]
 input = 0.08
 output = 0.35
+cache_read = 0.08
 
 [limit]
 context = 128_000
diff --git a/providers/wandb/models/moonshotai/Kimi-K2.5.toml b/providers/wandb/models/moonshotai/Kimi-K2.5.toml
index e7ffb5cb3..2fde70961 100644
--- a/providers/wandb/models/moonshotai/Kimi-K2.5.toml
+++ b/providers/wandb/models/moonshotai/Kimi-K2.5.toml
@@ -1,7 +1,7 @@
 name = "Kimi K2.5"
 family = "kimi"
 release_date = "2026-01-27"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = true
 reasoning = true
 structured_output = true
@@ -9,6 +9,8 @@ temperature = true
 tool_call = true
 open_weights = true
 
+interleaved = true
+
 [cost]
 input = 0.5
 output = 2.85
@@ -20,6 +22,3 @@ output = 262_144
 [modalities]
 input = ["text", "image"]
 output = ["text"]
-
-[interleaved]
-field = "reasoning_content"
diff --git a/providers/wandb/models/moonshotai/Kimi-K2.6.toml b/providers/wandb/models/moonshotai/Kimi-K2.6.toml
new file mode 100644
index 000000000..a9eb09a26
--- /dev/null
+++ b/providers/wandb/models/moonshotai/Kimi-K2.6.toml
@@ -0,0 +1,23 @@
+name = "Kimi K2.6"
+family = "kimi-k2.6"
+release_date = "2026-04-20"
+last_updated = "2026-05-19"
+attachment = true
+reasoning = true
+structured_output = true
+temperature = true
+tool_call = true
+open_weights = true
+
+[cost]
+input = 0.95
+output = 4
+cache_read = 0.16
+
+[limit]
+context = 262_144
+output = 262_144
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/wandb/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8.toml b/providers/wandb/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8.toml
index bdaef3f8c..001d64162 100644
--- a/providers/wandb/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8.toml
+++ b/providers/wandb/models/nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-FP8.toml
@@ -1,9 +1,9 @@
 name = "NVIDIA Nemotron 3 Super 120B"
 family = "nemotron"
 release_date = "2026-03-11"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
-reasoning = false
+reasoning = true
 structured_output = true
 temperature = true
 tool_call = true
@@ -12,6 +12,7 @@ open_weights = true
 [cost]
 input = 0.2
 output = 0.8
+cache_read = 0.2
 
 [limit]
 context = 262_144
diff --git a/providers/wandb/models/openai/gpt-oss-120b.toml b/providers/wandb/models/openai/gpt-oss-120b.toml
index c87249838..f4f5fca7b 100644
--- a/providers/wandb/models/openai/gpt-oss-120b.toml
+++ b/providers/wandb/models/openai/gpt-oss-120b.toml
@@ -1,17 +1,18 @@
 name = "gpt-oss-120b"
 family = "gpt-oss"
 release_date = "2025-08-05"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
-reasoning = false
+reasoning = true
 structured_output = true
 temperature = true
 tool_call = true
-open_weights = false
+open_weights = true
 
 [cost]
 input = 0.15
 output = 0.6
+cache_read = 0.15
 
 [limit]
 context = 131_072
diff --git a/providers/wandb/models/openai/gpt-oss-20b.toml b/providers/wandb/models/openai/gpt-oss-20b.toml
index 3768e3a41..e7bbd8be3 100644
--- a/providers/wandb/models/openai/gpt-oss-20b.toml
+++ b/providers/wandb/models/openai/gpt-oss-20b.toml
@@ -1,17 +1,18 @@
 name = "gpt-oss-20b"
 family = "gpt-oss"
 release_date = "2025-08-05"
-last_updated = "2026-03-12"
+last_updated = "2026-05-19"
 attachment = false
-reasoning = false
+reasoning = true
 structured_output = true
 temperature = true
 tool_call = true
-open_weights = false
+open_weights = true
 
 [cost]
 input = 0.05
 output = 0.2
+cache_read = 0.05
 
 [limit]
 context = 131_072
diff --git a/providers/wandb/models/zai-org/GLM-5.1.toml b/providers/wandb/models/zai-org/GLM-5.1.toml
index d79f6acb9..72a69a42f 100644
--- a/providers/wandb/models/zai-org/GLM-5.1.toml
+++ b/providers/wandb/models/zai-org/GLM-5.1.toml
@@ -1,2 +1,26 @@
-[extends]
-from = "zai/glm-5.1"
\ No newline at end of file
+name = "GLM-5.1"
+family = "glm"
+release_date = "2026-03-27"
+last_updated = "2026-05-19"
+attachment = false
+reasoning = true
+structured_output = true
+temperature = true
+tool_call = true
+open_weights = false
+
+interleaved = true
+
+[cost]
+input = 1.4
+output = 4.4
+cache_read = 0.26
+cache_write = 0
+
+[limit]
+context = 200_000
+output = 131_072
+
+[modalities]
+input = ["text"]
+output = ["text"]