refactor: share openai-compatible local discovery

This commit is contained in:
Peter Steinberger
2026-03-12 22:26:41 +00:00
parent 9692dc7668
commit 7fd4dea1af

View File

@@ -31,33 +31,20 @@ const log = createSubsystemLogger("agents/model-providers");
const OLLAMA_SHOW_CONCURRENCY = 8; const OLLAMA_SHOW_CONCURRENCY = 8;
const OLLAMA_SHOW_MAX_MODELS = 200; const OLLAMA_SHOW_MAX_MODELS = 200;
const SGLANG_BASE_URL = "http://127.0.0.1:30000/v1"; const OPENAI_COMPAT_LOCAL_DEFAULT_CONTEXT_WINDOW = 128000;
const SGLANG_DEFAULT_CONTEXT_WINDOW = 128000; const OPENAI_COMPAT_LOCAL_DEFAULT_MAX_TOKENS = 8192;
const SGLANG_DEFAULT_MAX_TOKENS = 8192; const OPENAI_COMPAT_LOCAL_DEFAULT_COST = {
const SGLANG_DEFAULT_COST = {
input: 0, input: 0,
output: 0, output: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
}; };
const SGLANG_BASE_URL = "http://127.0.0.1:30000/v1";
const VLLM_BASE_URL = "http://127.0.0.1:8000/v1"; const VLLM_BASE_URL = "http://127.0.0.1:8000/v1";
const VLLM_DEFAULT_CONTEXT_WINDOW = 128000;
const VLLM_DEFAULT_MAX_TOKENS = 8192;
const VLLM_DEFAULT_COST = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
};
type VllmModelsResponse = { type OpenAICompatModelsResponse = {
data?: Array<{
id?: string;
}>;
};
type SglangModelsResponse = {
data?: Array<{ data?: Array<{
id?: string; id?: string;
}>; }>;
@@ -112,31 +99,34 @@ async function discoverOllamaModels(
} }
} }
async function discoverVllmModels( async function discoverOpenAICompatibleLocalModels(params: {
baseUrl: string, baseUrl: string;
apiKey?: string, apiKey?: string;
): Promise<ModelDefinitionConfig[]> { label: string;
contextWindow?: number;
maxTokens?: number;
}): Promise<ModelDefinitionConfig[]> {
if (process.env.VITEST || process.env.NODE_ENV === "test") { if (process.env.VITEST || process.env.NODE_ENV === "test") {
return []; return [];
} }
const trimmedBaseUrl = baseUrl.trim().replace(/\/+$/, ""); const trimmedBaseUrl = params.baseUrl.trim().replace(/\/+$/, "");
const url = `${trimmedBaseUrl}/models`; const url = `${trimmedBaseUrl}/models`;
try { try {
const trimmedApiKey = apiKey?.trim(); const trimmedApiKey = params.apiKey?.trim();
const response = await fetch(url, { const response = await fetch(url, {
headers: trimmedApiKey ? { Authorization: `Bearer ${trimmedApiKey}` } : undefined, headers: trimmedApiKey ? { Authorization: `Bearer ${trimmedApiKey}` } : undefined,
signal: AbortSignal.timeout(5000), signal: AbortSignal.timeout(5000),
}); });
if (!response.ok) { if (!response.ok) {
log.warn(`Failed to discover vLLM models: ${response.status}`); log.warn(`Failed to discover ${params.label} models: ${response.status}`);
return []; return [];
} }
const data = (await response.json()) as VllmModelsResponse; const data = (await response.json()) as OpenAICompatModelsResponse;
const models = data.data ?? []; const models = data.data ?? [];
if (models.length === 0) { if (models.length === 0) {
log.warn("No vLLM models found on local instance"); log.warn(`No ${params.label} models found on local instance`);
return []; return [];
} }
@@ -150,62 +140,13 @@ async function discoverVllmModels(
name: modelId, name: modelId,
reasoning: isReasoningModelHeuristic(modelId), reasoning: isReasoningModelHeuristic(modelId),
input: ["text"], input: ["text"],
cost: VLLM_DEFAULT_COST, cost: OPENAI_COMPAT_LOCAL_DEFAULT_COST,
contextWindow: VLLM_DEFAULT_CONTEXT_WINDOW, contextWindow: params.contextWindow ?? OPENAI_COMPAT_LOCAL_DEFAULT_CONTEXT_WINDOW,
maxTokens: VLLM_DEFAULT_MAX_TOKENS, maxTokens: params.maxTokens ?? OPENAI_COMPAT_LOCAL_DEFAULT_MAX_TOKENS,
} satisfies ModelDefinitionConfig; } satisfies ModelDefinitionConfig;
}); });
} catch (error) { } catch (error) {
log.warn(`Failed to discover vLLM models: ${String(error)}`); log.warn(`Failed to discover ${params.label} models: ${String(error)}`);
return [];
}
}
async function discoverSglangModels(
baseUrl: string,
apiKey?: string,
): Promise<ModelDefinitionConfig[]> {
if (process.env.VITEST || process.env.NODE_ENV === "test") {
return [];
}
const trimmedBaseUrl = baseUrl.trim().replace(/\/+$/, "");
const url = `${trimmedBaseUrl}/models`;
try {
const trimmedApiKey = apiKey?.trim();
const response = await fetch(url, {
headers: trimmedApiKey ? { Authorization: `Bearer ${trimmedApiKey}` } : undefined,
signal: AbortSignal.timeout(5000),
});
if (!response.ok) {
log.warn(`Failed to discover SGLang models: ${response.status}`);
return [];
}
const data = (await response.json()) as SglangModelsResponse;
const models = data.data ?? [];
if (models.length === 0) {
log.warn("No SGLang models found on local instance");
return [];
}
return models
.map((model) => ({ id: typeof model.id === "string" ? model.id.trim() : "" }))
.filter((model) => Boolean(model.id))
.map((model) => {
const modelId = model.id;
return {
id: modelId,
name: modelId,
reasoning: isReasoningModelHeuristic(modelId),
input: ["text"],
cost: SGLANG_DEFAULT_COST,
contextWindow: SGLANG_DEFAULT_CONTEXT_WINDOW,
maxTokens: SGLANG_DEFAULT_MAX_TOKENS,
} satisfies ModelDefinitionConfig;
});
} catch (error) {
log.warn(`Failed to discover SGLang models: ${String(error)}`);
return []; return [];
} }
} }
@@ -257,7 +198,11 @@ export async function buildVllmProvider(params?: {
apiKey?: string; apiKey?: string;
}): Promise<ProviderConfig> { }): Promise<ProviderConfig> {
const baseUrl = (params?.baseUrl?.trim() || VLLM_BASE_URL).replace(/\/+$/, ""); const baseUrl = (params?.baseUrl?.trim() || VLLM_BASE_URL).replace(/\/+$/, "");
const models = await discoverVllmModels(baseUrl, params?.apiKey); const models = await discoverOpenAICompatibleLocalModels({
baseUrl,
apiKey: params?.apiKey,
label: "vLLM",
});
return { return {
baseUrl, baseUrl,
api: "openai-completions", api: "openai-completions",
@@ -270,7 +215,11 @@ export async function buildSglangProvider(params?: {
apiKey?: string; apiKey?: string;
}): Promise<ProviderConfig> { }): Promise<ProviderConfig> {
const baseUrl = (params?.baseUrl?.trim() || SGLANG_BASE_URL).replace(/\/+$/, ""); const baseUrl = (params?.baseUrl?.trim() || SGLANG_BASE_URL).replace(/\/+$/, "");
const models = await discoverSglangModels(baseUrl, params?.apiKey); const models = await discoverOpenAICompatibleLocalModels({
baseUrl,
apiKey: params?.apiKey,
label: "SGLang",
});
return { return {
baseUrl, baseUrl,
api: "openai-completions", api: "openai-completions",