refactor: extract shared install and embedding utilities

2026-02-18 04:48:30 +00:00
parent 4d3403b7ac
commit 8a9fddedc9
7 changed files with 247 additions and 257 deletions
--- a/src/memory/manager-embedding-ops.ts
+++ b/src/memory/manager-embedding-ops.ts
@@ -366,40 +366,49 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
    };
  }

-  private async embedChunksWithVoyageBatch(
-    chunks: MemoryChunk[],
-    entry: MemoryFileEntry | SessionFileEntry,
-    source: MemorySource,
-  ): Promise<number[][]> {
-    const voyage = this.voyage;
-    if (!voyage) {
-      return this.embedChunksInBatches(chunks);
+  private async embedChunksWithProviderBatch<TRequest extends { custom_id: string }>(params: {
+    chunks: MemoryChunk[];
+    entry: MemoryFileEntry | SessionFileEntry;
+    source: MemorySource;
+    provider: "voyage" | "openai" | "gemini";
+    enabled: boolean;
+    buildRequest: (chunk: MemoryChunk) => Omit<TRequest, "custom_id">;
+    runBatch: (runnerOptions: {
+      agentId: string;
+      requests: TRequest[];
+      wait: boolean;
+      concurrency: number;
+      pollIntervalMs: number;
+      timeoutMs: number;
+      debug: (message: string, data?: Record<string, unknown>) => void;
+    }) => Promise<Map<string, number[]> | number[][]>;
+  }): Promise<number[][]> {
+    if (!params.enabled) {
+      return this.embedChunksInBatches(params.chunks);
    }
-    if (chunks.length === 0) {
+    if (params.chunks.length === 0) {
      return [];
    }
-    const { embeddings, missing } = this.collectCachedEmbeddings(chunks);
+    const { embeddings, missing } = this.collectCachedEmbeddings(params.chunks);
    if (missing.length === 0) {
      return embeddings;
    }

-    const { requests, mapping } = this.buildBatchRequests<VoyageBatchRequest>({
+    const { requests, mapping } = this.buildBatchRequests<TRequest>({
      missing,
-      entry,
-      source,
-      build: (chunk) => ({
-        body: { input: chunk.text },
-      }),
+      entry: params.entry,
+      source: params.source,
+      build: params.buildRequest,
+    });
+    const runnerOptions = this.buildEmbeddingBatchRunnerOptions({
+      requests,
+      chunks: params.chunks,
+      source: params.source,
    });
-    const runnerOptions = this.buildEmbeddingBatchRunnerOptions({ requests, chunks, source });
    const batchResult = await this.runBatchWithFallback({
-      provider: "voyage",
-      run: async () =>
-        await runVoyageEmbeddingBatches({
-          client: voyage,
-          ...runnerOptions,
-        }),
-      fallback: async () => await this.embedChunksInBatches(chunks),
+      provider: params.provider,
+      run: async () => await params.runBatch(runnerOptions),
+      fallback: async () => await this.embedChunksInBatches(params.chunks),
    });
    if (Array.isArray(batchResult)) {
      return batchResult;
@@ -408,51 +417,55 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
    return embeddings;
  }

+  private async embedChunksWithVoyageBatch(
+    chunks: MemoryChunk[],
+    entry: MemoryFileEntry | SessionFileEntry,
+    source: MemorySource,
+  ): Promise<number[][]> {
+    const voyage = this.voyage;
+    return await this.embedChunksWithProviderBatch<VoyageBatchRequest>({
+      chunks,
+      entry,
+      source,
+      provider: "voyage",
+      enabled: Boolean(voyage),
+      buildRequest: (chunk) => ({
+        body: { input: chunk.text },
+      }),
+      runBatch: async (runnerOptions) =>
+        await runVoyageEmbeddingBatches({
+          client: voyage!,
+          ...runnerOptions,
+        }),
+    });
+  }
+
  private async embedChunksWithOpenAiBatch(
    chunks: MemoryChunk[],
    entry: MemoryFileEntry | SessionFileEntry,
    source: MemorySource,
  ): Promise<number[][]> {
    const openAi = this.openAi;
-    if (!openAi) {
-      return this.embedChunksInBatches(chunks);
-    }
-    if (chunks.length === 0) {
-      return [];
-    }
-    const { embeddings, missing } = this.collectCachedEmbeddings(chunks);
-    if (missing.length === 0) {
-      return embeddings;
-    }
-
-    const { requests, mapping } = this.buildBatchRequests<OpenAiBatchRequest>({
-      missing,
+    return await this.embedChunksWithProviderBatch<OpenAiBatchRequest>({
+      chunks,
      entry,
      source,
-      build: (chunk) => ({
+      provider: "openai",
+      enabled: Boolean(openAi),
+      buildRequest: (chunk) => ({
        method: "POST",
        url: OPENAI_BATCH_ENDPOINT,
        body: {
-          model: this.openAi?.model ?? this.provider?.model ?? "text-embedding-3-small",
+          model: openAi?.model ?? this.provider?.model ?? "text-embedding-3-small",
          input: chunk.text,
        },
      }),
-    });
-    const runnerOptions = this.buildEmbeddingBatchRunnerOptions({ requests, chunks, source });
-    const batchResult = await this.runBatchWithFallback({
-      provider: "openai",
-      run: async () =>
+      runBatch: async (runnerOptions) =>
        await runOpenAiEmbeddingBatches({
-          openAi,
+          openAi: openAi!,
          ...runnerOptions,
        }),
-      fallback: async () => await this.embedChunksInBatches(chunks),
    });
-    if (Array.isArray(batchResult)) {
-      return batchResult;
-    }
-    this.applyBatchEmbeddings({ byCustomId: batchResult, mapping, embeddings });
-    return embeddings;
  }

  private async embedChunksWithGeminiBatch(
@@ -461,42 +474,22 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
    source: MemorySource,
  ): Promise<number[][]> {
    const gemini = this.gemini;
-    if (!gemini) {
-      return this.embedChunksInBatches(chunks);
-    }
-    if (chunks.length === 0) {
-      return [];
-    }
-    const { embeddings, missing } = this.collectCachedEmbeddings(chunks);
-    if (missing.length === 0) {
-      return embeddings;
-    }
-
-    const { requests, mapping } = this.buildBatchRequests<GeminiBatchRequest>({
-      missing,
+    return await this.embedChunksWithProviderBatch<GeminiBatchRequest>({
+      chunks,
      entry,
      source,
-      build: (chunk) => ({
+      provider: "gemini",
+      enabled: Boolean(gemini),
+      buildRequest: (chunk) => ({
        content: { parts: [{ text: chunk.text }] },
        taskType: "RETRIEVAL_DOCUMENT",
      }),
-    });
-    const runnerOptions = this.buildEmbeddingBatchRunnerOptions({ requests, chunks, source });
-
-    const batchResult = await this.runBatchWithFallback({
-      provider: "gemini",
-      run: async () =>
+      runBatch: async (runnerOptions) =>
        await runGeminiEmbeddingBatches({
-          gemini,
+          gemini: gemini!,
          ...runnerOptions,
        }),
-      fallback: async () => await this.embedChunksInBatches(chunks),
    });
-    if (Array.isArray(batchResult)) {
-      return batchResult;
-    }
-    this.applyBatchEmbeddings({ byCustomId: batchResult, mapping, embeddings });
-    return embeddings;
  }

  protected async embedBatchWithRetry(texts: string[]): Promise<number[][]> {