* Docs: refresh Venice default model guidance * Venice: switch default model to Kimi K2.5 * Changelog: credit Venice default refresh
710 lines
18 KiB
TypeScript
710 lines
18 KiB
TypeScript
import type { ModelDefinitionConfig } from "../config/types.js";
|
|
import { retryAsync } from "../infra/retry.js";
|
|
import { createSubsystemLogger } from "../logging/subsystem.js";
|
|
|
|
const log = createSubsystemLogger("venice-models");
|
|
|
|
export const VENICE_BASE_URL = "https://api.venice.ai/api/v1";
|
|
export const VENICE_DEFAULT_MODEL_ID = "kimi-k2-5";
|
|
export const VENICE_DEFAULT_MODEL_REF = `venice/${VENICE_DEFAULT_MODEL_ID}`;
|
|
|
|
// Venice uses credit-based pricing, not per-token costs.
|
|
// Set to 0 as costs vary by model and account type.
|
|
export const VENICE_DEFAULT_COST = {
|
|
input: 0,
|
|
output: 0,
|
|
cacheRead: 0,
|
|
cacheWrite: 0,
|
|
};
|
|
|
|
const VENICE_DEFAULT_CONTEXT_WINDOW = 128_000;
|
|
const VENICE_DEFAULT_MAX_TOKENS = 4096;
|
|
const VENICE_DISCOVERY_HARD_MAX_TOKENS = 131_072;
|
|
const VENICE_DISCOVERY_TIMEOUT_MS = 10_000;
|
|
const VENICE_DISCOVERY_RETRYABLE_HTTP_STATUS = new Set([408, 425, 429, 500, 502, 503, 504]);
|
|
const VENICE_DISCOVERY_RETRYABLE_NETWORK_CODES = new Set([
|
|
"ECONNABORTED",
|
|
"ECONNREFUSED",
|
|
"ECONNRESET",
|
|
"EAI_AGAIN",
|
|
"ENETDOWN",
|
|
"ENETUNREACH",
|
|
"ENOTFOUND",
|
|
"ETIMEDOUT",
|
|
"UND_ERR_BODY_TIMEOUT",
|
|
"UND_ERR_CONNECT_TIMEOUT",
|
|
"UND_ERR_CONNECT_ERROR",
|
|
"UND_ERR_HEADERS_TIMEOUT",
|
|
"UND_ERR_SOCKET",
|
|
]);
|
|
|
|
/**
|
|
* Complete catalog of Venice AI models.
|
|
*
|
|
* Venice provides two privacy modes:
|
|
* - "private": Fully private inference, no logging, ephemeral
|
|
* - "anonymized": Proxied through Venice with metadata stripped (for proprietary models)
|
|
*
|
|
* Note: The `privacy` field is included for documentation purposes but is not
|
|
* propagated to ModelDefinitionConfig as it's not part of the core model schema.
|
|
* Privacy mode is determined by the model itself, not configurable at runtime.
|
|
*
|
|
* This catalog serves as a fallback when the Venice API is unreachable.
|
|
*/
|
|
export const VENICE_MODEL_CATALOG = [
|
|
// ============================================
|
|
// PRIVATE MODELS (Fully private, no logging)
|
|
// ============================================
|
|
|
|
// Llama models
|
|
{
|
|
id: "llama-3.3-70b",
|
|
name: "Llama 3.3 70B",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 128000,
|
|
maxTokens: 4096,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "llama-3.2-3b",
|
|
name: "Llama 3.2 3B",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 128000,
|
|
maxTokens: 4096,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "hermes-3-llama-3.1-405b",
|
|
name: "Hermes 3 Llama 3.1 405B",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 128000,
|
|
maxTokens: 16384,
|
|
supportsTools: false,
|
|
privacy: "private",
|
|
},
|
|
|
|
// Qwen models
|
|
{
|
|
id: "qwen3-235b-a22b-thinking-2507",
|
|
name: "Qwen3 235B Thinking",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 128000,
|
|
maxTokens: 16384,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "qwen3-235b-a22b-instruct-2507",
|
|
name: "Qwen3 235B Instruct",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 128000,
|
|
maxTokens: 16384,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "qwen3-coder-480b-a35b-instruct",
|
|
name: "Qwen3 Coder 480B",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 256000,
|
|
maxTokens: 65536,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "qwen3-coder-480b-a35b-instruct-turbo",
|
|
name: "Qwen3 Coder 480B Turbo",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 256000,
|
|
maxTokens: 65536,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "qwen3-5-35b-a3b",
|
|
name: "Qwen3.5 35B A3B",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 256000,
|
|
maxTokens: 65536,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "qwen3-next-80b",
|
|
name: "Qwen3 Next 80B",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 256000,
|
|
maxTokens: 16384,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "qwen3-vl-235b-a22b",
|
|
name: "Qwen3 VL 235B (Vision)",
|
|
reasoning: false,
|
|
input: ["text", "image"],
|
|
contextWindow: 256000,
|
|
maxTokens: 16384,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "qwen3-4b",
|
|
name: "Venice Small (Qwen3 4B)",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 32000,
|
|
maxTokens: 4096,
|
|
privacy: "private",
|
|
},
|
|
|
|
// DeepSeek
|
|
{
|
|
id: "deepseek-v3.2",
|
|
name: "DeepSeek V3.2",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 160000,
|
|
maxTokens: 32768,
|
|
supportsTools: false,
|
|
privacy: "private",
|
|
},
|
|
|
|
// Venice-specific models
|
|
{
|
|
id: "venice-uncensored",
|
|
name: "Venice Uncensored (Dolphin-Mistral)",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 32000,
|
|
maxTokens: 4096,
|
|
supportsTools: false,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "mistral-31-24b",
|
|
name: "Venice Medium (Mistral)",
|
|
reasoning: false,
|
|
input: ["text", "image"],
|
|
contextWindow: 128000,
|
|
maxTokens: 4096,
|
|
privacy: "private",
|
|
},
|
|
|
|
// Other private models
|
|
{
|
|
id: "google-gemma-3-27b-it",
|
|
name: "Google Gemma 3 27B Instruct",
|
|
reasoning: false,
|
|
input: ["text", "image"],
|
|
contextWindow: 198000,
|
|
maxTokens: 16384,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "openai-gpt-oss-120b",
|
|
name: "OpenAI GPT OSS 120B",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 128000,
|
|
maxTokens: 16384,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "nvidia-nemotron-3-nano-30b-a3b",
|
|
name: "NVIDIA Nemotron 3 Nano 30B",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 128000,
|
|
maxTokens: 16384,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "olafangensan-glm-4.7-flash-heretic",
|
|
name: "GLM 4.7 Flash Heretic",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 128000,
|
|
maxTokens: 24000,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "zai-org-glm-4.6",
|
|
name: "GLM 4.6",
|
|
reasoning: false,
|
|
input: ["text"],
|
|
contextWindow: 198000,
|
|
maxTokens: 16384,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "zai-org-glm-4.7",
|
|
name: "GLM 4.7",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 198000,
|
|
maxTokens: 16384,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "zai-org-glm-4.7-flash",
|
|
name: "GLM 4.7 Flash",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 128000,
|
|
maxTokens: 16384,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "zai-org-glm-5",
|
|
name: "GLM 5",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 198000,
|
|
maxTokens: 32000,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "kimi-k2-5",
|
|
name: "Kimi K2.5",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 256000,
|
|
maxTokens: 65536,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "kimi-k2-thinking",
|
|
name: "Kimi K2 Thinking",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 256000,
|
|
maxTokens: 65536,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "minimax-m21",
|
|
name: "MiniMax M2.1",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 198000,
|
|
maxTokens: 32768,
|
|
privacy: "private",
|
|
},
|
|
{
|
|
id: "minimax-m25",
|
|
name: "MiniMax M2.5",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 198000,
|
|
maxTokens: 32768,
|
|
privacy: "private",
|
|
},
|
|
|
|
// ============================================
|
|
// ANONYMIZED MODELS (Proxied through Venice)
|
|
// These are proprietary models accessed via Venice's proxy
|
|
// ============================================
|
|
|
|
// Anthropic (via Venice)
|
|
{
|
|
id: "claude-opus-4-5",
|
|
name: "Claude Opus 4.5 (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 198000,
|
|
maxTokens: 32768,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "claude-opus-4-6",
|
|
name: "Claude Opus 4.6 (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 1000000,
|
|
maxTokens: 128000,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "claude-sonnet-4-5",
|
|
name: "Claude Sonnet 4.5 (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 198000,
|
|
maxTokens: 64000,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "claude-sonnet-4-6",
|
|
name: "Claude Sonnet 4.6 (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 1000000,
|
|
maxTokens: 64000,
|
|
privacy: "anonymized",
|
|
},
|
|
|
|
// OpenAI (via Venice)
|
|
{
|
|
id: "openai-gpt-52",
|
|
name: "GPT-5.2 (via Venice)",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 256000,
|
|
maxTokens: 65536,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "openai-gpt-52-codex",
|
|
name: "GPT-5.2 Codex (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 256000,
|
|
maxTokens: 65536,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "openai-gpt-53-codex",
|
|
name: "GPT-5.3 Codex (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 400000,
|
|
maxTokens: 128000,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "openai-gpt-54",
|
|
name: "GPT-5.4 (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 1000000,
|
|
maxTokens: 131072,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "openai-gpt-4o-2024-11-20",
|
|
name: "GPT-4o (via Venice)",
|
|
reasoning: false,
|
|
input: ["text", "image"],
|
|
contextWindow: 128000,
|
|
maxTokens: 16384,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "openai-gpt-4o-mini-2024-07-18",
|
|
name: "GPT-4o Mini (via Venice)",
|
|
reasoning: false,
|
|
input: ["text", "image"],
|
|
contextWindow: 128000,
|
|
maxTokens: 16384,
|
|
privacy: "anonymized",
|
|
},
|
|
|
|
// Google (via Venice)
|
|
{
|
|
id: "gemini-3-pro-preview",
|
|
name: "Gemini 3 Pro (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 198000,
|
|
maxTokens: 32768,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "gemini-3-1-pro-preview",
|
|
name: "Gemini 3.1 Pro (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 1000000,
|
|
maxTokens: 32768,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "gemini-3-flash-preview",
|
|
name: "Gemini 3 Flash (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 256000,
|
|
maxTokens: 65536,
|
|
privacy: "anonymized",
|
|
},
|
|
|
|
// xAI (via Venice)
|
|
{
|
|
id: "grok-41-fast",
|
|
name: "Grok 4.1 Fast (via Venice)",
|
|
reasoning: true,
|
|
input: ["text", "image"],
|
|
contextWindow: 1000000,
|
|
maxTokens: 30000,
|
|
privacy: "anonymized",
|
|
},
|
|
{
|
|
id: "grok-code-fast-1",
|
|
name: "Grok Code Fast 1 (via Venice)",
|
|
reasoning: true,
|
|
input: ["text"],
|
|
contextWindow: 256000,
|
|
maxTokens: 10000,
|
|
privacy: "anonymized",
|
|
},
|
|
] as const;
|
|
|
|
export type VeniceCatalogEntry = (typeof VENICE_MODEL_CATALOG)[number];
|
|
|
|
/**
|
|
* Build a ModelDefinitionConfig from a Venice catalog entry.
|
|
*
|
|
* Note: The `privacy` field from the catalog is not included in the output
|
|
* as ModelDefinitionConfig doesn't support custom metadata fields. Privacy
|
|
* mode is inherent to each model and documented in the catalog/docs.
|
|
*/
|
|
export function buildVeniceModelDefinition(entry: VeniceCatalogEntry): ModelDefinitionConfig {
|
|
return {
|
|
id: entry.id,
|
|
name: entry.name,
|
|
reasoning: entry.reasoning,
|
|
input: [...entry.input],
|
|
cost: VENICE_DEFAULT_COST,
|
|
contextWindow: entry.contextWindow,
|
|
maxTokens: entry.maxTokens,
|
|
// Avoid usage-only streaming chunks that can break OpenAI-compatible parsers.
|
|
// See: https://github.com/openclaw/openclaw/issues/15819
|
|
compat: {
|
|
supportsUsageInStreaming: false,
|
|
...("supportsTools" in entry && !entry.supportsTools ? { supportsTools: false } : {}),
|
|
},
|
|
};
|
|
}
|
|
|
|
// Venice API response types
|
|
interface VeniceModelSpec {
|
|
name: string;
|
|
privacy: "private" | "anonymized";
|
|
availableContextTokens?: number;
|
|
maxCompletionTokens?: number;
|
|
capabilities?: {
|
|
supportsReasoning?: boolean;
|
|
supportsVision?: boolean;
|
|
supportsFunctionCalling?: boolean;
|
|
};
|
|
}
|
|
|
|
interface VeniceModel {
|
|
id: string;
|
|
model_spec?: VeniceModelSpec;
|
|
}
|
|
|
|
interface VeniceModelsResponse {
|
|
data: VeniceModel[];
|
|
}
|
|
|
|
class VeniceDiscoveryHttpError extends Error {
|
|
readonly status: number;
|
|
|
|
constructor(status: number) {
|
|
super(`HTTP ${status}`);
|
|
this.name = "VeniceDiscoveryHttpError";
|
|
this.status = status;
|
|
}
|
|
}
|
|
|
|
function staticVeniceModelDefinitions(): ModelDefinitionConfig[] {
|
|
return VENICE_MODEL_CATALOG.map(buildVeniceModelDefinition);
|
|
}
|
|
|
|
function hasRetryableNetworkCode(err: unknown): boolean {
|
|
const queue: unknown[] = [err];
|
|
const seen = new Set<unknown>();
|
|
while (queue.length > 0) {
|
|
const current = queue.shift();
|
|
if (!current || typeof current !== "object" || seen.has(current)) {
|
|
continue;
|
|
}
|
|
seen.add(current);
|
|
const candidate = current as {
|
|
cause?: unknown;
|
|
errors?: unknown;
|
|
code?: unknown;
|
|
errno?: unknown;
|
|
};
|
|
const code =
|
|
typeof candidate.code === "string"
|
|
? candidate.code
|
|
: typeof candidate.errno === "string"
|
|
? candidate.errno
|
|
: undefined;
|
|
if (code && VENICE_DISCOVERY_RETRYABLE_NETWORK_CODES.has(code)) {
|
|
return true;
|
|
}
|
|
if (candidate.cause) {
|
|
queue.push(candidate.cause);
|
|
}
|
|
if (Array.isArray(candidate.errors)) {
|
|
queue.push(...candidate.errors);
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
function isRetryableVeniceDiscoveryError(err: unknown): boolean {
|
|
if (err instanceof VeniceDiscoveryHttpError) {
|
|
return true;
|
|
}
|
|
if (err instanceof Error && err.name === "AbortError") {
|
|
return true;
|
|
}
|
|
if (err instanceof TypeError && err.message.toLowerCase() === "fetch failed") {
|
|
return true;
|
|
}
|
|
return hasRetryableNetworkCode(err);
|
|
}
|
|
|
|
function normalizePositiveInt(value: unknown): number | undefined {
|
|
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
|
|
return undefined;
|
|
}
|
|
return Math.floor(value);
|
|
}
|
|
|
|
function resolveApiMaxCompletionTokens(params: {
|
|
apiModel: VeniceModel;
|
|
knownMaxTokens?: number;
|
|
}): number | undefined {
|
|
const raw = normalizePositiveInt(params.apiModel.model_spec?.maxCompletionTokens);
|
|
if (!raw) {
|
|
return undefined;
|
|
}
|
|
const contextWindow = normalizePositiveInt(params.apiModel.model_spec?.availableContextTokens);
|
|
const knownMaxTokens =
|
|
typeof params.knownMaxTokens === "number" && Number.isFinite(params.knownMaxTokens)
|
|
? Math.floor(params.knownMaxTokens)
|
|
: undefined;
|
|
const hardCap = knownMaxTokens ?? VENICE_DISCOVERY_HARD_MAX_TOKENS;
|
|
const fallbackContextWindow = knownMaxTokens ?? VENICE_DEFAULT_CONTEXT_WINDOW;
|
|
return Math.min(raw, contextWindow ?? fallbackContextWindow, hardCap);
|
|
}
|
|
|
|
function resolveApiSupportsTools(apiModel: VeniceModel): boolean | undefined {
|
|
const supportsFunctionCalling = apiModel.model_spec?.capabilities?.supportsFunctionCalling;
|
|
return typeof supportsFunctionCalling === "boolean" ? supportsFunctionCalling : undefined;
|
|
}
|
|
|
|
/**
|
|
* Discover models from Venice API with fallback to static catalog.
|
|
* The /models endpoint is public and doesn't require authentication.
|
|
*/
|
|
export async function discoverVeniceModels(): Promise<ModelDefinitionConfig[]> {
|
|
// Skip API discovery in test environment
|
|
if (process.env.NODE_ENV === "test" || process.env.VITEST) {
|
|
return staticVeniceModelDefinitions();
|
|
}
|
|
|
|
try {
|
|
const response = await retryAsync(
|
|
async () => {
|
|
const currentResponse = await fetch(`${VENICE_BASE_URL}/models`, {
|
|
signal: AbortSignal.timeout(VENICE_DISCOVERY_TIMEOUT_MS),
|
|
headers: {
|
|
Accept: "application/json",
|
|
},
|
|
});
|
|
if (
|
|
!currentResponse.ok &&
|
|
VENICE_DISCOVERY_RETRYABLE_HTTP_STATUS.has(currentResponse.status)
|
|
) {
|
|
throw new VeniceDiscoveryHttpError(currentResponse.status);
|
|
}
|
|
return currentResponse;
|
|
},
|
|
{
|
|
attempts: 3,
|
|
minDelayMs: 300,
|
|
maxDelayMs: 2000,
|
|
jitter: 0.2,
|
|
label: "venice-model-discovery",
|
|
shouldRetry: isRetryableVeniceDiscoveryError,
|
|
},
|
|
);
|
|
|
|
if (!response.ok) {
|
|
log.warn(`Failed to discover models: HTTP ${response.status}, using static catalog`);
|
|
return staticVeniceModelDefinitions();
|
|
}
|
|
|
|
const data = (await response.json()) as VeniceModelsResponse;
|
|
if (!Array.isArray(data.data) || data.data.length === 0) {
|
|
log.warn("No models found from API, using static catalog");
|
|
return staticVeniceModelDefinitions();
|
|
}
|
|
|
|
// Merge discovered models with catalog metadata
|
|
const catalogById = new Map<string, VeniceCatalogEntry>(
|
|
VENICE_MODEL_CATALOG.map((m) => [m.id, m]),
|
|
);
|
|
const models: ModelDefinitionConfig[] = [];
|
|
|
|
for (const apiModel of data.data) {
|
|
const catalogEntry = catalogById.get(apiModel.id);
|
|
const apiMaxTokens = resolveApiMaxCompletionTokens({
|
|
apiModel,
|
|
knownMaxTokens: catalogEntry?.maxTokens,
|
|
});
|
|
const apiSupportsTools = resolveApiSupportsTools(apiModel);
|
|
if (catalogEntry) {
|
|
const definition = buildVeniceModelDefinition(catalogEntry);
|
|
if (apiMaxTokens !== undefined) {
|
|
definition.maxTokens = apiMaxTokens;
|
|
}
|
|
// We only let live discovery disable tools. Re-enabling tool support still
|
|
// requires a catalog update so a transient/bad /models response cannot
|
|
// silently expand the tool execution surface for known models.
|
|
if (apiSupportsTools === false) {
|
|
definition.compat = {
|
|
...definition.compat,
|
|
supportsTools: false,
|
|
};
|
|
}
|
|
models.push(definition);
|
|
} else {
|
|
// Create definition for newly discovered models not in catalog
|
|
const apiSpec = apiModel.model_spec;
|
|
const isReasoning =
|
|
apiSpec?.capabilities?.supportsReasoning ||
|
|
apiModel.id.toLowerCase().includes("thinking") ||
|
|
apiModel.id.toLowerCase().includes("reason") ||
|
|
apiModel.id.toLowerCase().includes("r1");
|
|
|
|
const hasVision = apiSpec?.capabilities?.supportsVision === true;
|
|
|
|
models.push({
|
|
id: apiModel.id,
|
|
name: apiSpec?.name || apiModel.id,
|
|
reasoning: isReasoning,
|
|
input: hasVision ? ["text", "image"] : ["text"],
|
|
cost: VENICE_DEFAULT_COST,
|
|
contextWindow:
|
|
normalizePositiveInt(apiSpec?.availableContextTokens) ?? VENICE_DEFAULT_CONTEXT_WINDOW,
|
|
maxTokens: apiMaxTokens ?? VENICE_DEFAULT_MAX_TOKENS,
|
|
// Avoid usage-only streaming chunks that can break OpenAI-compatible parsers.
|
|
compat: {
|
|
supportsUsageInStreaming: false,
|
|
...(apiSupportsTools === false ? { supportsTools: false } : {}),
|
|
},
|
|
});
|
|
}
|
|
}
|
|
|
|
return models.length > 0 ? models : staticVeniceModelDefinitions();
|
|
} catch (error) {
|
|
if (error instanceof VeniceDiscoveryHttpError) {
|
|
log.warn(`Failed to discover models: HTTP ${error.status}, using static catalog`);
|
|
return staticVeniceModelDefinitions();
|
|
}
|
|
log.warn(`Discovery failed: ${String(error)}, using static catalog`);
|
|
return staticVeniceModelDefinitions();
|
|
}
|
|
}
|